├── .dockerignore ├── .env ├── .github ├── dependabot.yml └── workflows │ ├── build-and-deploy.yml │ ├── check-docs.yml │ ├── deploy-documentation.yml │ ├── generative-tests.yml │ ├── main.yml │ ├── pages-deployment.yml │ ├── update-dependencies.yml │ ├── update-external-studies.yml │ ├── update-pledge.yml │ └── update-tpp-schema.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .prettierignore ├── .python-version ├── .vscode └── launch.json ├── DEVELOPERS.md ├── Dockerfile ├── GLOSSARY.md ├── LICENSE ├── README.md ├── bin ├── LICENSE ├── cosmopolitan-release-url.txt └── pledge ├── build-dependencies.txt ├── dependencies.txt ├── docs ├── explanation │ ├── backend-tables.md │ ├── index.md │ ├── measures.md │ ├── output-formats.md │ ├── running-ehrql.md │ ├── selecting-populations-for-study.md │ ├── using-ehrql-in-opensafely-projects.md │ ├── vscode-extension.md │ ├── vscode_extension_ehrql_debug.png │ ├── vscode_extension_menu_bar_button.png │ ├── vscode_extension_run_button.png │ ├── vscode_extension_run_button_dropdown.png │ ├── vscode_extension_search.png │ ├── vscode_extensions_icon.png │ └── vscode_extensions_icon_updates.png ├── how-to │ ├── assign-multiple-columns.md │ ├── codelists.md │ ├── define-population.md │ ├── dummy-data.md │ ├── dummy-measures-data.md │ ├── errors.md │ ├── examples.md │ ├── index.md │ ├── opensafely_exec_create_dummy_tables.png │ ├── opensafely_exec_dummy_data_file.png │ ├── opensafely_exec_dummy_measures_data_file.png │ └── test-dataset-definition.md ├── includes │ └── generated_docs │ │ ├── backends.md │ │ ├── cli.md │ │ ├── language__codelists.md │ │ ├── language__dataset.md │ │ ├── language__date_arithmetic.md │ │ ├── language__frames.md │ │ ├── language__functions.md │ │ ├── language__measures.md │ │ ├── language__series.md │ │ ├── schemas.md │ │ ├── schemas │ │ ├── core.md │ │ ├── emis.md │ │ ├── raw.core.md │ │ ├── raw.emis.md │ │ ├── raw.tpp.md │ │ ├── smoketest.md │ │ └── tpp.md │ │ └── specs.md ├── index.md ├── reference │ ├── backends.md │ ├── cheatsheet.md │ ├── cli.md │ ├── features.md │ ├── index.md │ ├── language.md │ ├── schemas │ ├── schemas.md │ └── upgrading-ehrql-from-v0-to-v1.md ├── sandbox │ ├── medications.csv │ └── patients.csv ├── stylesheets │ └── extra.css └── tutorial │ ├── building-a-dataset │ └── index.md │ ├── index.md │ ├── more-complex-transformations │ └── index.md │ ├── quiz │ ├── index.md │ ├── play-button-drop-down.png │ └── play-button.png │ ├── setting-up │ ├── building-codespace.png │ ├── enhanced-tracking-protection.png │ ├── green-buttons.png │ ├── index.md │ ├── new-codespace-screen.png │ ├── run-button.png │ └── successful-run.png │ ├── simple-transformations │ ├── autocomplete-example.png │ └── index.md │ ├── using-ehrql-as-part-of-a-study │ ├── index.md │ └── terminal.png │ ├── working-with-data-with-ehrql │ └── index.md │ └── xkcd-2582.png ├── ehrql ├── VERSION ├── __init__.py ├── __main__.py ├── assurance.py ├── backends │ ├── __init__.py │ ├── base.py │ ├── emis.py │ └── tpp.py ├── codes.py ├── debugger.py ├── docs │ ├── __init__.py │ ├── __main__.py │ ├── backends.py │ ├── cli.py │ ├── common.py │ ├── language.py │ ├── render_includes │ │ ├── __init__.py │ │ ├── backends.py │ │ ├── cli.py │ │ ├── language.py │ │ ├── schemas.py │ │ └── specs.py │ ├── schemas.py │ └── specs.py ├── dummy_data │ ├── __init__.py │ ├── generator.py │ ├── measures.py │ └── query_info.py ├── dummy_data_nextgen │ ├── __init__.py │ ├── generator.py │ ├── measures.py │ └── query_info.py ├── example-data │ ├── clinical_events.csv │ ├── medications.csv │ ├── ons_deaths.csv │ ├── patients.csv │ └── practice_registrations.csv ├── exceptions.py ├── file_formats │ ├── __init__.py │ ├── arrow.py │ ├── base.py │ ├── console.py │ ├── csv.py │ ├── main.py │ └── validation.py ├── loaders.py ├── main.py ├── measures │ ├── __init__.py │ ├── calculate.py │ ├── disclosure_control.py │ └── measures.py ├── query_engines │ ├── __init__.py │ ├── base.py │ ├── base_sql.py │ ├── debug.py │ ├── in_memory.py │ ├── in_memory_database.py │ ├── local_file.py │ ├── mssql.py │ ├── mssql_dialect.py │ ├── sqlite.py │ ├── sqlite_dialect.py │ ├── trino.py │ └── trino_dialect.py ├── query_language.py ├── query_model │ ├── __init__.py │ ├── column_specs.py │ ├── graphs.py │ ├── introspection.py │ ├── nodes.py │ ├── population_validation.py │ ├── query_graph_rewriter.py │ ├── table_schema.py │ └── transforms.py ├── quiz.py ├── renderers.py ├── serializer.py ├── sqlalchemy_types.py ├── tables │ ├── __init__.py │ ├── core.py │ ├── emis.py │ ├── raw │ │ ├── __init__.py │ │ ├── core.py │ │ ├── emis.py │ │ └── tpp.py │ ├── smoketest.py │ └── tpp.py └── utils │ ├── __init__.py │ ├── date_utils.py │ ├── docs_utils.py │ ├── functools_utils.py │ ├── itertools_utils.py │ ├── log_utils.py │ ├── math_utils.py │ ├── module_utils.py │ ├── mssql_log_utils.py │ ├── regex_utils.py │ ├── sequence_utils.py │ ├── sqlalchemy_exec_utils.py │ ├── sqlalchemy_query_utils.py │ ├── string_utils.py │ ├── traceback_utils.py │ └── typing_utils.py ├── hooks ├── __init__.py └── parent_snippets.py ├── justfile ├── mkdocs.yml ├── pyproject.minimal.toml ├── pyproject.toml ├── requirements.dev.in ├── requirements.dev.txt ├── requirements.prod.in ├── requirements.prod.txt ├── scripts ├── .gitignore ├── generate_example_data.py ├── generate_quiz_from_answers.py └── run-debug.sh └── tests ├── __init__.py ├── acceptance ├── __init__.py ├── external_studies │ ├── mainroute_cancer │ │ ├── analysis │ │ │ ├── codelists.py │ │ │ ├── dataset_definition.py │ │ │ ├── define_static_dataset.py │ │ │ └── measures_demo.py │ │ └── codelists │ │ │ ├── opensafely-ethnicity-snomed-0removed.csv │ │ │ ├── phc-2ww-referral-colorectal.csv │ │ │ ├── phc-colorectal-cancer-icd10.csv │ │ │ ├── phc-fit-test.csv │ │ │ ├── phc-phc-colorectal-cancer-ctv3.csv │ │ │ ├── phc-phc-colorectal-cancer-snomed.csv │ │ │ ├── phc-symptom-colorectal-cibh.csv │ │ │ ├── phc-symptom-colorectal-ida.csv │ │ │ ├── phc-symptom-colorectal-pr-bleeding.csv │ │ │ ├── phc-symptom-colorectal-wl.csv │ │ │ ├── phc-symptom-lowergi-abdo-mass.csv │ │ │ ├── phc-symptom-lowergi-abdo-pain.csv │ │ │ ├── phc-symptom-lowergi-anaemia.csv │ │ │ └── phc-symptoms-colorectal-cancer.csv │ ├── qof-diabetes │ │ ├── analysis │ │ │ ├── codelists.py │ │ │ ├── dataset_definition_dm017.py │ │ │ ├── dataset_definition_dm020.py │ │ │ ├── dataset_definition_dm021.py │ │ │ ├── dm_dataset.py │ │ │ └── variable_lib_helper.py │ │ └── codelists │ │ │ ├── nhsd-primary-care-domain-refsets-bldtestdec_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-dm_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-dminvite_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-dmmax_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-dmpcadec_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-dmpcapu_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-dmres_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-ifcchbam_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-mildfrail_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-modfrail_cod.csv │ │ │ ├── nhsd-primary-care-domain-refsets-serfruc_cod.csv │ │ │ └── nhsd-primary-care-domain-refsets-sevfrail_cod.csv │ ├── test-age-distribution │ │ └── analysis │ │ │ └── dataset_definition.py │ └── waiting-list │ │ ├── analysis │ │ ├── codelists.py │ │ ├── dataset_definition_clockstops.py │ │ ├── measures_checks.py │ │ └── measures_opioid.py │ │ └── codelists │ │ ├── ons-depression-and-generalised-anxiety-disorder-diagnoses-and-symptoms.csv │ │ ├── opensafely-anxiety-disorders.csv │ │ ├── opensafely-cancer-excluding-lung-and-haematological-snomed.csv │ │ ├── opensafely-chronic-cardiac-disease.csv │ │ ├── opensafely-chronic-kidney-disease-snomed.csv │ │ ├── opensafely-chronic-liver-disease.csv │ │ ├── opensafely-chronic-respiratory-disease.csv │ │ ├── opensafely-depression.csv │ │ ├── opensafely-diabetes.csv │ │ ├── opensafely-ethnicity-snomed-0removed.csv │ │ ├── opensafely-haematological-cancer-snomed.csv │ │ ├── opensafely-high-dose-long-acting-opioids-openprescribing-dmd.csv │ │ ├── opensafely-hypertension.csv │ │ ├── opensafely-lung-cancer-snomed.csv │ │ ├── opensafely-nsaids-oral.csv │ │ ├── opensafely-osteoarthritis.csv │ │ ├── opensafely-rheumatoid-arthritis.csv │ │ ├── opensafely-strongopioidsCW-dmd.csv │ │ ├── opensafely-symptoms-anxiety.csv │ │ ├── opensafely-symptoms-depression.csv │ │ ├── opensafely-symptoms-pain.csv │ │ ├── primis-covid19-vacc-uptake-old-sev_mental_cod.csv │ │ ├── user-anschaf-antidepressants-dmd.csv │ │ ├── user-anschaf-codeine-for-pain-dmd.csv │ │ ├── user-anschaf-gabapentinoids-dmd.csv │ │ ├── user-anschaf-long-acting-opioids-dmd.csv │ │ ├── user-anschaf-opioids-for-analgesia-dmd.csv │ │ ├── user-anschaf-weak-opioids-dmd.csv │ │ ├── user-hjforbes-opioid-dependency-clinical-diagnosis.csv │ │ ├── user-speed-vm-antidepressants-for-pain-indication-dmd.csv │ │ └── user-speed-vm-nsaids-dmd.csv ├── test_external_studies.py └── update_external_studies.py ├── autocomplete ├── __init__.py ├── autocomplete_definition.py ├── language_server.py └── test_autocomplete.py ├── conftest.py ├── docker ├── __init__.py ├── test_cli.py └── test_drivers.py ├── docs ├── __init__.py ├── test_complete_examples.py ├── test_find_docs_examples.py └── test_run_generate_dataset_example.py ├── fixtures ├── bad_definition_files │ ├── bad_import.py │ ├── bad_syntax.py │ ├── bad_types.py │ ├── empty_measures.py │ ├── no_dataset.py │ ├── no_measures.py │ ├── no_population.py │ ├── not_a_dataset.py │ ├── not_measures_instance.py │ └── operator_error.py ├── codelist_csvs │ ├── categories.csv │ ├── custom_col.csv │ ├── default_col.csv │ ├── extra_whitespace.csv │ └── long_csv.csv ├── csv_date_merging │ ├── measure_test_2021-01-01.csv │ ├── measure_test_2021-02-01.csv │ ├── measure_test_2021-03-01.csv │ ├── measure_test_20210908.csv │ ├── measure_test_code_2021-03-01.csv │ ├── measure_test_code_2021-04-01.csv │ ├── measure_test_error_2021-01-01.csv │ ├── measure_test_error_2021-02-01.csv │ └── measure_test_event.csv ├── debug │ └── patients.csv ├── dummy_data │ ├── dummy-data.csv │ ├── dummy-data.txt │ ├── extra-column.csv │ ├── invalid-bool.csv │ ├── invalid-date.csv │ ├── invalid-patient-id.csv │ ├── long_covid_dummy_data.csv │ ├── missing-column.csv │ └── zero-date.csv ├── good_definition_files │ ├── assurance.py │ ├── chatty_dataset_definition.py │ ├── dataset_definition.py │ ├── dataset_definition_with_print.py │ ├── debug_definition.py │ └── measure_definitions.py ├── local_file_engine │ ├── events.csv │ └── patients.csv └── quiz-example-data │ ├── addresses.csv │ ├── clinical_events.csv │ ├── medications.csv │ ├── ons_deaths.csv │ ├── patients.csv │ └── practice_registrations.csv ├── functional ├── __init__.py ├── test_assure.py ├── test_create_dummy_tables.py ├── test_debug.py ├── test_dump_dataset_sql.py ├── test_dump_example_data.py ├── test_entrypoint.py ├── test_generate_dataset.py ├── test_generate_measures.py ├── test_graph_query.py ├── test_isolation_report.py ├── test_serialize_definition.py └── test_test_connection.py ├── generative ├── README.md ├── __init__.py ├── conftest.py ├── data_setup.py ├── data_strategies.py ├── example.py ├── generic_strategies.py ├── ignored_errors.py ├── recording.py ├── test_data_setup.py ├── test_query_model.py └── variable_strategies.py ├── integration ├── __init__.py ├── backends │ ├── __init__.py │ ├── conftest.py │ ├── helpers.py │ ├── test_base.py │ ├── test_emis.py │ └── test_tpp.py ├── file_formats │ ├── __init__.py │ ├── test_arrow.py │ ├── test_csv.py │ └── test_main.py ├── measures │ ├── __init__.py │ └── test_calculate.py ├── query_engines │ ├── __init__.py │ ├── test_dialects.py │ ├── test_local_file.py │ ├── test_mssql.py │ ├── test_mssql_dialect.py │ └── test_trino_dialect.py ├── query_model │ ├── __init__.py │ └── test_transforms.py ├── tables │ ├── __init__.py │ ├── test_core.py │ ├── test_emis.py │ └── test_tpp.py ├── test_query_engines.py └── utils │ ├── __init__.py │ ├── test_mssql_log_utils.py │ ├── test_sqlalchemy_exec_utils.py │ ├── test_sqlalchemy_query_utils.py │ └── test_traceback_utils.py ├── lib ├── __init__.py ├── create_tpp_test_db.py ├── databases.py ├── docker.py ├── emis_schema.py ├── file_utils.py ├── gentest_example_simplify.py ├── inspect_utils.py ├── orm_utils.py ├── query_model_utils.py ├── test_gentest_example_simplify.py ├── tpp_categorical_columns.csv ├── tpp_data_dictionary.csv ├── tpp_decision_support_reference.csv ├── tpp_schema.csv ├── tpp_schema.py └── update_tpp_schema.py ├── spec ├── README.md ├── __init__.py ├── aggregate_frame │ ├── __init__.py │ ├── test_count_for_patient.py │ └── test_exists_for_patient.py ├── aggregate_series │ ├── __init__.py │ ├── test_count_distinct_for_patient.py │ ├── test_mean_for_patient.py │ ├── test_minimum_and_maximum_for_patient.py │ └── test_sum_for_patient.py ├── bool_series_ops │ ├── __init__.py │ ├── test_conversion.py │ └── test_logical_ops.py ├── case_expressions │ ├── __init__.py │ ├── test_case.py │ └── test_when.py ├── code_series_ops │ ├── __init__.py │ ├── test_containment.py │ └── test_map_codes_to_categories.py ├── combine_series │ ├── __init__.py │ ├── test_event_series_and_event_series.py │ ├── test_event_series_and_patient_series.py │ ├── test_event_series_and_value.py │ ├── test_patient_series_and_patient_series.py │ └── test_patient_series_and_value.py ├── conftest.py ├── date_series_ops │ ├── __init__.py │ ├── test_date_aggregations.py │ ├── test_date_comparison_types.py │ ├── test_date_comparisons.py │ └── test_date_series_ops.py ├── dummy │ ├── __init__.py │ └── test_dummy.py ├── filter │ ├── __init__.py │ ├── test_except_where.py │ └── test_where.py ├── float_series_ops │ ├── __init__.py │ ├── test_arithmetic_ops.py │ ├── test_comparison_ops.py │ ├── test_conversion.py │ └── test_division_ops.py ├── int_series_ops │ ├── __init__.py │ ├── test_arithmetic_ops.py │ ├── test_comparison_ops.py │ ├── test_conversion.py │ └── test_division_ops.py ├── multi_code_string_series_ops │ ├── __init__.py │ └── test_containment.py ├── population │ ├── __init__.py │ └── test_population.py ├── series_ops │ ├── __init__.py │ ├── test_containment.py │ ├── test_containment_with_series.py │ ├── test_equality.py │ ├── test_map_values.py │ ├── test_maximum_of_and_minimum_of_event_series.py │ ├── test_maximum_of_and_minimum_of_patient_series.py │ └── test_when_null_then.py ├── sort_and_pick │ ├── __init__.py │ ├── test_sort_by_column_and_pick.py │ ├── test_sort_by_column_with_nulls_and_pick.py │ ├── test_sort_by_interleaved_with_where.py │ ├── test_sort_by_multiple_columns_and_pick.py │ └── test_sort_extends_to_all_columns_when_underspecified.py ├── str_series_ops │ ├── __init__.py │ └── test_contains.py ├── table_from_rows │ ├── __init__.py │ └── test_table_from_rows.py ├── tables.py ├── test_conftest.py ├── test_specs.py └── toc.py ├── support ├── mssql │ ├── entrypoint.sh │ └── setup.sql └── trino │ ├── entrypoint.sh │ └── etc │ ├── catalog │ └── trino.properties │ ├── config.properties │ ├── jvm.config │ ├── log.properties │ └── node.properties └── unit ├── __init__.py ├── backends ├── __init__.py ├── test_base.py ├── test_emis.py └── test_tpp.py ├── docs ├── __init__.py ├── test_common.py ├── test_language.py └── test_schemas.py ├── dummy_data ├── __init__.py ├── test_dependencies.py ├── test_generator.py └── test_query_info.py ├── dummy_data_nextgen ├── __init__.py ├── test_edge_cases_for_coverage.py ├── test_generator.py ├── test_measures.py ├── test_query_info.py └── test_specific_datasets.py ├── file_formats ├── __init__.py ├── test_arrow.py ├── test_base.py ├── test_console.py ├── test_csv.py └── test_main.py ├── measures ├── __init__.py ├── test_disclosure_control.py ├── test_dummy_data.py └── test_measures.py ├── query_engines ├── __init__.py ├── test_in_memory.py ├── test_in_memory_database.py └── test_mssql_dialect.py ├── query_model ├── __init__.py ├── test_column_specs.py ├── test_constraints.py ├── test_graphs.py ├── test_nodes.py ├── test_population_validation.py ├── test_query_graph_rewriter.py ├── test_table_schema.py └── test_transforms.py ├── test___main__.py ├── test_assurance.py ├── test_codes.py ├── test_debugger.py ├── test_docs.py ├── test_example_data.py ├── test_loaders.py ├── test_main.py ├── test_pyproject_minimal.py ├── test_query_language.py ├── test_quiz.py ├── test_renderers.py ├── test_serializer.py ├── test_sqlalchemy_types.py ├── test_tables.py └── utils ├── __init__.py ├── test_date_utils.py ├── test_functools_utils.py ├── test_itertools_utils.py ├── test_log_utils.py ├── test_math_utils.py ├── test_mssql_log_utils.py ├── test_regex_utils.py ├── test_sequence_utils.py ├── test_sqlalchemy_exec_utils.py ├── test_sqlalchemy_query_utils.py ├── test_string_utils.py ├── test_traceback_utils.py └── test_typing_utils.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | 3 | **/*~ 4 | **/.#* 5 | **/*# 6 | **/htmlcov 7 | **/__pycache__ 8 | **/*.pyc 9 | **/.python-version 10 | **/.env 11 | **/.venv 12 | **/venv 13 | **/.coverage 14 | **/*.egg-info/ 15 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # This file defines environment variables we want to be set in development 2 | # environments. The Just command runner and VSCode's Python extension (but not 3 | # the terminal) should pick these up automatically, see: 4 | # https://github.com/casey/just#dotenv-load 5 | # https://code.visualstudio.com/docs/python/environments#_environment-variables 6 | # 7 | # You can load these manually in bash using something like: 8 | # 9 | # set -o allexport; source .env; set +o allexport 10 | # 11 | 12 | # Disable hash randomisation. The kinds of DoS attacks hash seed randomisation 13 | # is designed to protect against don't apply to ehrQL, and having consistent 14 | # output makes debugging much easier 15 | PYTHONHASHSEED=0 16 | 17 | # Enable event level queries for testing purposes, but not yet in production 18 | EHRQL_ENABLE_EVENT_LEVEL_QUERIES=True 19 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | 5 | - package-ecosystem: "github-actions" 6 | directory: "/" 7 | schedule: 8 | interval: "weekly" 9 | commit-message: 10 | prefix: "chore: " 11 | -------------------------------------------------------------------------------- /.github/workflows/check-docs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Check documentation 3 | 4 | on: 5 | workflow_dispatch: 6 | push: 7 | branches: 8 | - main 9 | pull_request: 10 | 11 | jobs: 12 | documentation: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repo 17 | uses: actions/checkout@v4 18 | - uses: opensafely-core/setup-action@v1 19 | with: 20 | install-just: true 21 | python-version: "3.11" 22 | cache-dependency-path: requirements.*.txt 23 | 24 | - name: Check generated docs are up-to-date 25 | run: just docs-check-generated-docs-are-current 26 | 27 | # This check becomes somewhat redundant if we fix up the Cloudflare Pages preview 28 | # to work with Dependabot, because the deployment will also do the build. 29 | # See https://github.com/opensafely/documentation/issues/930 which documents this problem. 30 | # 31 | # However, for any PR, Cloudflare Pages previews sometimes fail for mysterious reasons, 32 | # and this requires logging into Cloudflare Pages to inspect. 33 | # So it is perhaps useful to distinguish a Cloudflare failure with an actual issue. 34 | - name: Check docs build 35 | run: just docs-build 36 | -------------------------------------------------------------------------------- /.github/workflows/deploy-documentation.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Trigger a deploy of opensafely documentation site" 3 | 4 | on: 5 | workflow_run: 6 | workflows: 7 | - CI 8 | branches: 9 | - main 10 | types: 11 | - completed 12 | 13 | jobs: 14 | build-docs: 15 | runs-on: ubuntu-latest 16 | if: ${{ github.event.workflow_run.conclusion == 'success' }} 17 | 18 | steps: 19 | - name: Trigger documentation deploy 20 | uses: actions/github-script@v7 21 | with: 22 | github-token: ${{ secrets.DOCS_WRITE_TOKEN }} 23 | script: | 24 | github.rest.actions.createWorkflowDispatch({ 25 | owner: 'opensafely', 26 | repo: 'documentation', 27 | workflow_id: 'pages-deployment.yml', 28 | ref: 'main' 29 | }); 30 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | 4 | on: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | 10 | jobs: 11 | check: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: opensafely-core/setup-action@v1 17 | with: 18 | install-just: true 19 | python-version: "3.11" 20 | - name: Set up development environment 21 | run: just devenv 22 | - name: Check formatting and linting rules 23 | run: just check 24 | 25 | test: 26 | runs-on: ubuntu-latest 27 | 28 | steps: 29 | - uses: actions/checkout@v4 30 | - uses: opensafely-core/setup-action@v1 31 | with: 32 | install-just: true 33 | python-version: "3.11" 34 | - name: Set up development environment 35 | run: just devenv 36 | - name: Run tests 37 | run: | 38 | just test-all 39 | 40 | tag-new-version: 41 | # This uses `conventional commits` to generate tags. A full list 42 | # of valid prefixes is here: 43 | # https://github.com/commitizen/conventional-commit-types/blob/master/index.json 44 | # 45 | # fix, perf -> patch release 46 | # feat -> minor release 47 | # BREAKING CHANGE in footer -> major release 48 | # 49 | # anything else (docs, refactor, etc) does not create a release 50 | if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' 51 | needs: [check, test] 52 | runs-on: ubuntu-latest 53 | outputs: 54 | tag: ${{ steps.tag.outputs.new_version }} 55 | steps: 56 | - uses: actions/checkout@v4 57 | with: 58 | fetch-depth: 0 59 | - name: Bump version and push tag 60 | id: tag 61 | uses: mathieudutour/github-tag-action@a22cf08638b34d5badda920f9daf6e72c477b07b #v6.2 62 | with: 63 | github_token: ${{ secrets.GITHUB_TOKEN }} 64 | default_bump: false 65 | release_branches: main 66 | -------------------------------------------------------------------------------- /.github/workflows/pages-deployment.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | jobs: 4 | deploy: 5 | 6 | permissions: 7 | contents: read 8 | deployments: write 9 | 10 | runs-on: ubuntu-latest 11 | 12 | name: Deploy to Cloudflare Pages 13 | steps: 14 | - name: Checkout repo 15 | uses: actions/checkout@v4 16 | with: 17 | submodules: true 18 | 19 | - name: Install Python and just 20 | uses: opensafely-core/setup-action@v1 21 | with: 22 | install-just: true 23 | python-version: "3.11" 24 | 25 | - name: Check docs are current 26 | run: just docs-check-generated-docs-are-current 27 | 28 | - name: Build site 29 | run: just docs-build 30 | 31 | - name: Add a version file 32 | run: echo ${{ github.sha }} > site/version.html 33 | 34 | - name: Publish 35 | if: ${{ github.actor != 'dependabot[bot]' }} 36 | uses: cloudflare/pages-action@f0a1cd58cd66095dee69bfa18fa5efd1dde93bca # v1.5.0 37 | with: 38 | accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} 39 | apiToken: ${{ secrets.CLOUDFLARE_DIRECT_UPLOAD_API_TOKEN }} 40 | directory: "site" 41 | gitHubToken: ${{ secrets.GITHUB_TOKEN }} 42 | projectName: "databuilder-docs" 43 | -------------------------------------------------------------------------------- /.github/workflows/update-dependencies.yml: -------------------------------------------------------------------------------- 1 | name: Update python dependencies 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 4 * * WED" 7 | 8 | jobs: 9 | update-dependencies: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: "opensafely-core/setup-action@v1" 14 | with: 15 | python-version: "3.11" 16 | install-just: true 17 | 18 | - uses: actions/create-github-app-token@v2 19 | id: generate-token 20 | with: 21 | app-id: 1031449 # opensafely-core Create PR app 22 | private-key: ${{ secrets.CREATE_PR_APP_PRIVATE_KEY }} 23 | 24 | - uses: bennettoxford/update-dependencies-action@v1 25 | id: update 26 | with: 27 | token: ${{ steps.generate-token.outputs.token }} 28 | 29 | - name: Notify slack of PR 30 | if: ${{ steps.update.outputs.pull-request-operation != 'none' }} 31 | uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52 # v2.1.0 32 | with: 33 | method: chat.postMessage 34 | token: ${{ secrets.BENNETTBOT_SLACK_BOT_TOKEN }} 35 | payload: | 36 | channel: "C080S7W2ZPX" 37 | text: "Update dependencies\n${{ steps.update.outputs.pull-request-url }}" 38 | -------------------------------------------------------------------------------- /.github/workflows/update-pledge.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Create PR to update `bin/pledge`" 3 | 4 | on: 5 | workflow_dispatch: 6 | schedule: 7 | - cron: "33 2 * * *" 8 | 9 | jobs: 10 | create_pr_to_update_pledge: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: opensafely-core/setup-action@v1 16 | with: 17 | install-just: true 18 | python-version: "3.11" 19 | 20 | - name: "Ensure `bin/pledge` is at latest version" 21 | run: just update-pledge 22 | 23 | - name: Generate app token 24 | uses: actions/create-github-app-token@v2 25 | id: generate-token 26 | with: 27 | app-id: 1031449 # opensafely-core Create PR app 28 | private-key: ${{ secrets.CREATE_PR_APP_PRIVATE_KEY }} 29 | 30 | - name: "Create a Pull Request if there are any changes" 31 | id: create_pr 32 | uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 33 | with: 34 | add-paths: bin/* 35 | branch: bot/update-pledge 36 | base: main 37 | author: "opensafely-github-bot " 38 | committer: "opensafely-github-bot " 39 | commit-message: "fix: Update `bin/pledge`" 40 | title: "Update `bin/pledge`" 41 | token: ${{ steps.generate-token.outputs.token }} 42 | 43 | # The PR will still require manual approval, this just reduces it to a one-click process 44 | - name: Enable automerge 45 | if: steps.create_pr.outputs.pull-request-operation == 'created' 46 | run: gh pr merge --auto --squash ${{ steps.create_pr.outputs.pull-request-number }} 47 | env: 48 | GH_TOKEN: ${{ steps.generate-token.outputs.token }} 49 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.11 3 | 4 | exclude: tests/acceptance/external_studies/ 5 | 6 | repos: 7 | - repo: local 8 | hooks: 9 | - id: check 10 | name: check 11 | entry: just check 12 | language: system 13 | types: [python] 14 | require_serial: true 15 | pass_filenames: false 16 | 17 | - repo: https://github.com/pre-commit/pre-commit-hooks 18 | rev: v4.3.0 19 | hooks: 20 | - id: trailing-whitespace 21 | - id: end-of-file-fixer 22 | - id: debug-statements 23 | - id: check-ast 24 | - id: check-json 25 | - id: check-toml 26 | - id: check-yaml 27 | # --unsafe is a workaround for the use of !! in mkdocs.yml. 28 | args: [--unsafe] 29 | - id: detect-private-key 30 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | ** 2 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Debug: Current test file", 6 | "type": "debugpy", 7 | "request": "launch", 8 | "args": ["${file}"], 9 | "module": "pytest", 10 | "console": "integratedTerminal" 11 | }, 12 | { 13 | "name": "Debug: Generate docs", 14 | "type": "debugpy", 15 | "request": "launch", 16 | "args": ["docs/includes/generated_docs"], 17 | "module": "ehrql.docs", 18 | "console": "integratedTerminal" 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | OpenSAFELY ehrQL 2 | Copyright (C) University of Oxford 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenSAFELY ehrQL 2 | 3 | ehrQL is a Python-based query language for electronic health record (EHR) data. 4 | It has been designed for use with the OpenSAFELY platform. 5 | 6 | Documentation is at the [OpenSAFELY documentation site](https://docs.opensafely.org/ehrql). 7 | 8 | # For developers 9 | 10 | See [DEVELOPERS.md](DEVELOPERS.md). 11 | 12 | There is also [a glossary](GLOSSARY.md) of terms used in the codebase. 13 | 14 | # About the OpenSAFELY framework 15 | 16 | The OpenSAFELY framework is a Trusted Research Environment (TRE) for electronic 17 | health records research in the NHS, with a focus on public accountability and 18 | research quality. 19 | 20 | Read more at [OpenSAFELY.org](https://opensafely.org). 21 | -------------------------------------------------------------------------------- /bin/LICENSE: -------------------------------------------------------------------------------- 1 | The accompanying binary `pledge` is included under the license below. 2 | 3 | --- 4 | 5 | ISC License 6 | 7 | Copyright 2020 Justine Alexandra Roberts Tunney 8 | 9 | Permission to use, copy, modify, and/or distribute this software for 10 | any purpose with or without fee is hereby granted, provided that the 11 | above copyright notice and this permission notice appear in all copies. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 14 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 16 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 17 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 18 | PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 19 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 20 | PERFORMANCE OF THIS SOFTWARE. 21 | -------------------------------------------------------------------------------- /bin/cosmopolitan-release-url.txt: -------------------------------------------------------------------------------- 1 | https://github.com/jart/cosmopolitan/releases/download/4.0.2/cosmos-4.0.2.zip 2 | -------------------------------------------------------------------------------- /bin/pledge: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/bin/pledge -------------------------------------------------------------------------------- /build-dependencies.txt: -------------------------------------------------------------------------------- 1 | # list ubuntu packges needed to build dependencies, one per line 2 | python3.11-dev 3 | build-essential 4 | -------------------------------------------------------------------------------- /dependencies.txt: -------------------------------------------------------------------------------- 1 | # list ubuntu packages needed in production, one per line 2 | # run time dependencies 3 | # ensure fully working base python3 installation 4 | # see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e 5 | python3.11 6 | python3.11-venv 7 | python3.11-distutils 8 | 9 | # from packages.microsoft.com 10 | mssql-tools 11 | -------------------------------------------------------------------------------- /docs/explanation/index.md: -------------------------------------------------------------------------------- 1 | These explanations provide background knowledge for learning ehrQL. 2 | 3 | * [ehrQL backend tables](backend-tables.md) 4 | * [ehrQL output formats](output-formats.md) 5 | * [Using ehrQL in OpenSAFELY projects](using-ehrql-in-opensafely-projects.md) 6 | * [Running ehrQL](running-ehrql.md) 7 | * [Using the measures framework](measures.md) 8 | * [Selecting populations for study](selecting-populations-for-study.md) 9 | * [The OpenSAFELY VS Code extension](vscode-extension.md) 10 | -------------------------------------------------------------------------------- /docs/explanation/selecting-populations-for-study.md: -------------------------------------------------------------------------------- 1 | This page is aimed at researchers working with NHS England patient record data 2 | as provided by the OpenSAFELY backends. 3 | 4 | ## Continuity of patient data 5 | 6 | Within the NHS in England, it is usually assumed that 7 | a patient's current primary care record is complete: 8 | when a patient moves practice to another practice in England, 9 | their record moves with them. 10 | The electronic health records of patients transferring between practices in England 11 | should automatically get transferred via the GP2GP system. 12 | 13 | Known caveats are that: 14 | 15 | * not all data may be transferred; for example, appointment data 16 | * not all data may be available at once; for example, information on repeat prescriptions 17 | 18 | Refer to the [GP2GP site](https://digital.nhs.uk/services/gp2gp) 19 | and the [GP2GP Key Activities documentation (PDF)](https://digital.nhs.uk/binaries/content/assets/website-assets/services/gp2gp/gp2gp_key_activities_2017_v0_4.pdf) 20 | for further details of this transfer process. 21 | 22 | !!! note 23 | Researchers using OpenSAFELY may wish to select patients 24 | with a continuous registration. 25 | "Continuous registration" here means that 26 | a patient did not change practice during a time period of interest. 27 | 28 | For TPP, 29 | there is a [method to select patients with a continuous registration](../reference/schemas/tpp.md#practice_registrations.spanning). 30 | -------------------------------------------------------------------------------- /docs/explanation/vscode_extension_ehrql_debug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_ehrql_debug.png -------------------------------------------------------------------------------- /docs/explanation/vscode_extension_menu_bar_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_menu_bar_button.png -------------------------------------------------------------------------------- /docs/explanation/vscode_extension_run_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_run_button.png -------------------------------------------------------------------------------- /docs/explanation/vscode_extension_run_button_dropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_run_button_dropdown.png -------------------------------------------------------------------------------- /docs/explanation/vscode_extension_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_search.png -------------------------------------------------------------------------------- /docs/explanation/vscode_extensions_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extensions_icon.png -------------------------------------------------------------------------------- /docs/explanation/vscode_extensions_icon_updates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extensions_icon_updates.png -------------------------------------------------------------------------------- /docs/how-to/index.md: -------------------------------------------------------------------------------- 1 | The how-to guides provide practical steps for working with ehrQL in your project. 2 | 3 | * [Using ehrQL to answer specific questions](examples.md) 4 | * [How to include and exclude patients from your study population](define-population.md) 5 | * [Resolving ehrQL errors](errors.md) 6 | * [How to use dummy data in an ehrQL dataset definition](dummy-data.md) 7 | * [How to use dummy data in an ehrQL measures definition](dummy-measures-data.md) 8 | * [How to assign multiple columns to a dataset programmatically](assign-multiple-columns.md) 9 | * [How to work with codelists](codelists.md) 10 | * [How to test your dataset definition](test-dataset-definition.md) 11 | -------------------------------------------------------------------------------- /docs/how-to/opensafely_exec_create_dummy_tables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/how-to/opensafely_exec_create_dummy_tables.png -------------------------------------------------------------------------------- /docs/how-to/opensafely_exec_dummy_data_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/how-to/opensafely_exec_dummy_data_file.png -------------------------------------------------------------------------------- /docs/how-to/opensafely_exec_dummy_measures_data_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/how-to/opensafely_exec_dummy_measures_data_file.png -------------------------------------------------------------------------------- /docs/includes/generated_docs/language__codelists.md: -------------------------------------------------------------------------------- 1 | 2 |

3 | codelist_from_csv(filename, column, category_column=None) 4 |

5 |
6 | Read a codelist from a CSV file as either a list or a dictionary (for categorised 7 | codelists). 8 | 9 | _filename_
10 | Path to the file on disk, relative to the root of your repository. (Remember to use 11 | UNIX/style/forward-slashes not Windows\style\backslashes.) 12 | 13 | _column_
14 | Name of the column in the CSV file which contains the codes. 15 | 16 | _category_column_
17 | Optional name of a column in the CSV file which contains categories to which each 18 | code should be mapped. If this argument is passed then the resulting codelist will 19 | be a dictionary mapping each code to its corresponding category. This can be passed 20 | to the [`to_category()`](#CodePatientSeries.to_category) method to map a series of 21 | codes to a series of categories. 22 | 23 | For more detail see the [how-to guide](../how-to/examples.md/#using-codelists-with-category-columns). 24 |
25 | -------------------------------------------------------------------------------- /docs/includes/generated_docs/schemas/smoketest.md: -------------------------------------------------------------------------------- 1 | 2 | # smoketest schema 3 | 4 | Available on backends: [**TPP**](../backends.md#tpp), [**EMIS**](../backends.md#emis) 5 | 6 | This tiny schema is used to write a [minimal dataset definition][smoketest_repo] that 7 | can function as a basic end-to-end test (or "smoke test") of the OpenSAFELY platform 8 | across all available backends. 9 | 10 | [smoketest_repo]: https://github.com/opensafely/test-age-distribution 11 | 12 | ``` {.python .copy title='To use this schema in an ehrQL file:'} 13 | from ehrql.tables.smoketest import ( 14 | patients, 15 | ) 16 | ``` 17 | 18 |

one row per patient

19 | ## patients 20 | 21 | 22 |
23 |
Columns
24 |
25 |
26 |
27 | date_of_birth 28 | 🔗 29 | date 30 |
31 |
32 | Patient's year and month of birth, provided in format YYYY-MM-01. The day will always be the first of the month. 33 | 34 | * Always the first day of a month 35 | * Never `NULL` 36 |
37 |
38 | 39 |
40 |
41 | -------------------------------------------------------------------------------- /docs/reference/backends.md: -------------------------------------------------------------------------------- 1 | Dataset definitions written in ehrQL can be run inside different secure 2 | environments, managed by different providers of EHR data. 3 | 4 | For each such secure environment, there is a corresponding "backend" 5 | defined in ehrQL. Each ehrQL backend: 6 | 7 | * specifies the datasets available inside each secure environment 8 | * does the necessary translation work to allow the same 9 | dataset definition to run against data modelled in different ways and 10 | stored in different systems 11 | 12 | When writing a dataset definition you don't need to explicitly reference 13 | any particular backend. But, as not every dataset is available in every 14 | backend, the [table schema](schemas.md) you use to write your dataset 15 | definition will determine which backends it can be run against. 16 | 17 | Below are the backends currently supported in ehrQL, together with the 18 | list of [table schemas](schemas.md) each one supports. 19 | 20 | 21 | ---8<-- 'includes/generated_docs/backends.md' 22 | -------------------------------------------------------------------------------- /docs/reference/cli.md: -------------------------------------------------------------------------------- 1 | ---8<-- 'includes/generated_docs/cli.md' 2 | -------------------------------------------------------------------------------- /docs/reference/features.md: -------------------------------------------------------------------------------- 1 | This reference is structured as a series of examples. 2 | 3 | The intended audience is primarily: 4 | 5 | * researchers 6 | * software developers 7 | 8 | that already have some understanding of how the ehrQL works. 9 | 10 | !!! info 11 | Please refer to the introduction and tutorial documentation sections 12 | if you need more explanation of the underlying concepts behind ehrQL. 13 | 14 | ## How the examples work 15 | 16 | Each individual example demonstrates a specific ehrQL feature in isolation. 17 | 18 | Every example here consists of: 19 | 20 | 1. Headings and subheadings that summarise the feature being demonstrated. 21 | 2. A small example data input table containing entirely fictitious variables and values. 22 | * The table has a single-letter name referred to throughout the example 23 | * `e` for event-level table 24 | * `p` for patient-level table. 25 | * The columns of input tables use a name constructed from a single letter with a number 26 | to create an identifier — for example, `i1`. 27 | The single letter in the identifier refers to the column's data type: 28 | * a `b` column contains Boolean values 29 | * a `c` column contains electronic health record codes 30 | (the codes used in this reference are fictitious, for example: `abc`) 31 | * a `d` column contains dates 32 | * an `i` column contains integers 33 | * an `s` column contains strings 34 | * Both table and column names are written with code formatting throughout this reference. 35 | 3. An ehrQL query that extracts some data from the example table. 36 | Like the table names, ehrQL queries are displayed here with code formatting. 37 | 4. The resulting output from the ehrQL query, 38 | displayed as another table, 39 | to demonstrate the query's effect 40 | 41 | !!! note 42 | The examples here are automatically generated from [ehrQL's specification tests](https://github.com/opensafely-core/ehrql/tree/main/tests/spec). 43 | 44 | ---8<-- 'includes/generated_docs/specs.md' 45 | 46 | !!! parent_snippet:'includes/glossary.md' 47 | -------------------------------------------------------------------------------- /docs/reference/index.md: -------------------------------------------------------------------------------- 1 | The reference provides background knowledge for working with ehrQL in your project. 2 | 3 | * [Language reference](language.md) 4 | * [Language features](features.md) 5 | * [Backends](backends.md) 6 | * [Table schemas](schemas.md) 7 | * [Command line interface](cli.md) 8 | * [Cheatsheet](cheatsheet.md) 9 | * [Upgrading ehrQL from v0 to v1](upgrading-ehrql-from-v0-to-v1.md) 10 | -------------------------------------------------------------------------------- /docs/reference/schemas: -------------------------------------------------------------------------------- 1 | ../includes/generated_docs/schemas -------------------------------------------------------------------------------- /docs/reference/schemas.md: -------------------------------------------------------------------------------- 1 | Table schemas define the tables and columns available to query in a 2 | dataset definition. The schema a dataset definition is written against 3 | determines which [backends](backends.md) it can be run inside. 4 | 5 | Below are a list of all table schemas available in ehrQL, together with 6 | the backends that support them. 7 | 8 | ---8<-- 'includes/generated_docs/schemas.md' 9 | -------------------------------------------------------------------------------- /docs/sandbox/medications.csv: -------------------------------------------------------------------------------- 1 | patient_id,date,dmd_code 2 | 1,2023-01-01,9207411000001106 3 | 2,2023-02-02,39695411000001103 4 | 2,2023-03-03,39695411000001103 5 | 4,2023-04-04,9207411000001106 6 | 4,2023-05-05,39695411000001103 7 | 4,2023-06-06,9207411000001106 8 | -------------------------------------------------------------------------------- /docs/sandbox/patients.csv: -------------------------------------------------------------------------------- 1 | patient_id,date_of_birth,sex,date_of_death 2 | 1,1980-01-01,F, 3 | 2,1990-02-01,M, 4 | 3,2000-03-01,F, 5 | 4,2010-04-01,M, 6 | -------------------------------------------------------------------------------- /docs/tutorial/index.md: -------------------------------------------------------------------------------- 1 | This tutorial provides practical steps for learning ehrQL. 2 | 3 | ![An XKCD cartoon](xkcd-2582.png) 4 | 5 | You will work through using ehrQL to: 6 | 7 | * identify patients who should be on the QOF register for diabetes, and 8 | * categorise patients on the register according to various QOF business rules. 9 | 10 | ??? tip "What's QOF?" 11 | QOF is the [Quality and Outcomes Framework][1]. 12 | It is an incentive programme for GP practices. 13 | To recieve payment, GPs must keep registers of patients with various conditions, and demonstrate that they are providing care according to business rules. 14 | 15 | An example from the diabetes business rules (found [here][2]) would be that patients with a new diagnosis of diabetes should be referred to a structured education programme within nine months of diagnosis. 16 | 17 | The tutorial is interactive. 18 | You will get the most out of it if you run the code yourself, and do experiments by changing the code and trying to explain what you see. 19 | 20 | At the end of the tutorial there will be a quiz. 21 | 22 | You can find a tutorial for writing an OpenSAFELY study in our [Getting started][3] guide. 23 | 24 | Get started with [Setting up](./setting-up/index.md). 25 | 26 | --- 27 | 28 | There is also a [video walkthrough](https://www.youtube.com/watch?v=hjBShGRgsWs) of this tutorial available on YouTube. 29 | 30 | [![The thumbnail for the ehrQL video tutorial](https://img.youtube.com/vi/hjBShGRgsWs/0.jpg)](https://www.youtube.com/watch?v=hjBShGRgsWs) 31 | 32 | --- 33 | 34 | Cartoon from [xkcd.com/2582](https://xkcd.com/2582). 35 | 36 | [1]: https://qof.digital.nhs.uk/ 37 | [2]: https://digital.nhs.uk/data-and-information/data-collections-and-data-sets/data-collections/quality-and-outcomes-framework-qof/business-rules/quality-and-outcomes-framework-qof-business-rules-v49-2024-25 38 | [3]: https://docs.opensafely.org/getting-started/ 39 | -------------------------------------------------------------------------------- /docs/tutorial/quiz/play-button-drop-down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/quiz/play-button-drop-down.png -------------------------------------------------------------------------------- /docs/tutorial/quiz/play-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/quiz/play-button.png -------------------------------------------------------------------------------- /docs/tutorial/setting-up/building-codespace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/building-codespace.png -------------------------------------------------------------------------------- /docs/tutorial/setting-up/enhanced-tracking-protection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/enhanced-tracking-protection.png -------------------------------------------------------------------------------- /docs/tutorial/setting-up/green-buttons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/green-buttons.png -------------------------------------------------------------------------------- /docs/tutorial/setting-up/new-codespace-screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/new-codespace-screen.png -------------------------------------------------------------------------------- /docs/tutorial/setting-up/run-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/run-button.png -------------------------------------------------------------------------------- /docs/tutorial/setting-up/successful-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/successful-run.png -------------------------------------------------------------------------------- /docs/tutorial/simple-transformations/autocomplete-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/simple-transformations/autocomplete-example.png -------------------------------------------------------------------------------- /docs/tutorial/using-ehrql-as-part-of-a-study/index.md: -------------------------------------------------------------------------------- 1 | The last piece in the puzzle is to demonstrate how to use a dataset definition in an OpenSAFELY study. 2 | An OpenSAFELY study consists of a set of actions. 3 | At least one action must be an ehrQL action, to extract a dataset from an OpenSAFELY backend. 4 | 5 | You can run a single action using [`opensafely exec`][1]. 6 | 7 | In your Codespace, open a terminal by pressing `Ctrl+J`, and run: 8 | 9 | ``` 10 | opensafely exec ehrql:v1 generate-dataset dataset_definition.py --dummy-tables dummy_tables 11 | ``` 12 | 13 | You should see the terminal fill with a table of data in CSV format. 14 | Scroll up to see the column headers, and notice the two columns from your dataset definition (`prt_or_mal` and `ace_or_arb`). 15 | 16 | ![A screenshot of the terminal in a Codespace](terminal.png) 17 | 18 | > Question: what happens if you rename the `dataset` variable and run the `opensafely exec` command again? 19 | 20 | ??? tip "The anatomy of an OpenSAFELY command" 21 | What do the parts of the OpenSAFELY command 22 | `opensafely exec ehrql:v1 generate-dataset dataset_definition.py` 23 | do? 24 | 25 | * `opensafely exec` executes an OpenSAFELY action independently of other OpenSAFELY actions 26 | * `ehrql` is the OpenSAFELY action to execute 27 | * `v1` is the major version of the ehrQL action 28 | * `generate-dataset` is the ehrQL command to generate a dataset from a dataset definition 29 | * `dataset_definition.py` is the dataset definition 30 | * `--dummy-tables dummy_tables` gives the path to the dummy data 31 | 32 | Note: [the main OpenSAFELY tutorial][2] documents how you can describe the actions of your study in a file called `project.yaml`. 33 | 34 | Next: [Test your ehrQL knowledge with a quiz!](../quiz/index.md) 35 | 36 | !!! abstract "Feedback" 37 | Don't fancy the quiz? That's not a problem, but if you could fill in this very short [feedback form][3]{:target="_blank"} we'd really appreciate it. 38 | 39 | 40 | [1]: https://docs.opensafely.org/opensafely-cli/#exec-interactive-development 41 | [2]: https://docs.opensafely.org/getting-started/tutorial/run-the-project-pipeline/ 42 | [3]: https://docs.google.com/forms/d/e/1FAIpQLSeouuTXPnwShAjBllyln4tl2Q52PMG_aUhpma4odpE2MmCngg/viewform 43 | -------------------------------------------------------------------------------- /docs/tutorial/using-ehrql-as-part-of-a-study/terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/using-ehrql-as-part-of-a-study/terminal.png -------------------------------------------------------------------------------- /docs/tutorial/xkcd-2582.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/xkcd-2582.png -------------------------------------------------------------------------------- /ehrql/VERSION: -------------------------------------------------------------------------------- 1 | dev 2 | -------------------------------------------------------------------------------- /ehrql/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ehrql.codes import codelist_from_csv 4 | from ehrql.debugger import show 5 | from ehrql.measures import INTERVAL, Measures, create_measures 6 | from ehrql.query_language import ( 7 | Dataset, 8 | Error, 9 | case, 10 | create_dataset, 11 | days, 12 | maximum_of, 13 | minimum_of, 14 | months, 15 | weeks, 16 | when, 17 | years, 18 | ) 19 | from ehrql.utils.log_utils import init_logging 20 | 21 | 22 | __version__ = Path(__file__).parent.joinpath("VERSION").read_text().strip() 23 | 24 | 25 | __all__ = [ 26 | "codelist_from_csv", 27 | "INTERVAL", 28 | "Measures", 29 | "Dataset", 30 | "Error", 31 | "case", 32 | "create_dataset", 33 | "create_measures", 34 | "days", 35 | "show", 36 | "maximum_of", 37 | "minimum_of", 38 | "months", 39 | "weeks", 40 | "when", 41 | "years", 42 | ] 43 | 44 | init_logging() 45 | -------------------------------------------------------------------------------- /ehrql/backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/backends/__init__.py -------------------------------------------------------------------------------- /ehrql/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/docs/__init__.py -------------------------------------------------------------------------------- /ehrql/docs/backends.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import ehrql 5 | import ehrql.tables 6 | from ehrql.utils.module_utils import get_sibling_subclasses 7 | 8 | from ..backends.base import SQLBackend 9 | from .common import get_docstring 10 | 11 | 12 | SORT_ORDER = {k: i for i, k in enumerate(["TPP", "EMIS"])} 13 | 14 | 15 | def build_backends(): 16 | backend_classes = get_sibling_subclasses(SQLBackend) 17 | 18 | backends = [] 19 | for backend in backend_classes: 20 | implements = [ 21 | namespace.__name__.removeprefix(ehrql.tables.__name__ + ".") 22 | for namespace in backend.implements 23 | ] 24 | backends.append( 25 | { 26 | "name": backend.display_name, 27 | "dotted_path": f"{backend.__module__}.{backend.__qualname__}", 28 | "file_path": relative_file_path(backend.__module__), 29 | "docstring": get_docstring(backend), 30 | "implements": implements, 31 | } 32 | ) 33 | 34 | backends.sort(key=sort_key) 35 | return backends 36 | 37 | 38 | def relative_file_path(module_dotted_path): 39 | module_file = Path(sys.modules[module_dotted_path].__file__) 40 | ehrql_base = Path(ehrql.__file__).parents[1] 41 | return str(module_file.relative_to(ehrql_base)) 42 | 43 | 44 | def sort_key(obj): 45 | k = obj["name"] 46 | return SORT_ORDER.get(k, float("+inf")), k 47 | -------------------------------------------------------------------------------- /ehrql/docs/render_includes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/docs/render_includes/__init__.py -------------------------------------------------------------------------------- /ehrql/docs/render_includes/backends.py: -------------------------------------------------------------------------------- 1 | BACKEND_TEMPLATE = """\ 2 | ## {name} 3 | 4 | 5 | {dotted_path} 6 | 7 | 8 | 9 | {docstring} 10 | 11 | This backend implements the following table schemas: 12 | 13 | {schema_list} 14 | """ 15 | 16 | 17 | def render_backends(backend_data): 18 | return "\n".join( 19 | BACKEND_TEMPLATE.format( 20 | **backend, 21 | schema_list="\n".join( 22 | f" * [{schema}](schemas/{schema}.md)" 23 | for schema in backend["implements"] 24 | ), 25 | ) 26 | for backend in backend_data 27 | ) 28 | -------------------------------------------------------------------------------- /ehrql/dummy_data/__init__.py: -------------------------------------------------------------------------------- 1 | from ehrql.dummy_data.generator import DummyDataGenerator 2 | 3 | 4 | __all__ = ["DummyDataGenerator"] 5 | -------------------------------------------------------------------------------- /ehrql/dummy_data_nextgen/__init__.py: -------------------------------------------------------------------------------- 1 | from ehrql.dummy_data_nextgen.generator import DummyDataGenerator 2 | from ehrql.dummy_data_nextgen.measures import DummyMeasuresDataGenerator 3 | 4 | 5 | __all__ = ["DummyDataGenerator", "DummyMeasuresDataGenerator"] 6 | -------------------------------------------------------------------------------- /ehrql/example-data/medications.csv: -------------------------------------------------------------------------------- 1 | patient_id,date,dmd_code 2 | 9,2023-11-08,29984111000001107 3 | 15,2023-06-17,29984111000001107 4 | 19,2022-06-02,34188411000001109 5 | 28,2022-05-26,34188411000001109 6 | 56,2022-11-27,29984111000001107 7 | 59,2023-08-22,34188411000001109 8 | 59,2022-11-08,29984111000001107 9 | 61,2023-11-09,29984111000001107 10 | 67,2024-01-22,34188411000001109 11 | 84,2022-11-30,34188411000001109 12 | 87,2023-09-30,34188411000001109 13 | 92,2023-11-18,29984111000001107 14 | 93,2022-08-19,34188411000001109 15 | 99,2023-06-04,29984111000001107 16 | -------------------------------------------------------------------------------- /ehrql/example-data/ons_deaths.csv: -------------------------------------------------------------------------------- 1 | patient_id,date,place,underlying_cause_of_death,cause_of_death_01,cause_of_death_02,cause_of_death_03,cause_of_death_04,cause_of_death_05,cause_of_death_06,cause_of_death_07,cause_of_death_08,cause_of_death_09,cause_of_death_10,cause_of_death_11,cause_of_death_12,cause_of_death_13,cause_of_death_14,cause_of_death_15 2 | 8,1978-10-25,Hospital,J43.8,,,,,,,,,,,,,,, 3 | 10,2024-09-28,Care Home,J10.1,,,,,,,,,,,,,,, 4 | 31,1982-08-02,Hospital,A39.0,,,,,,,,,,,,,,, 5 | 32,2022-02-26,Home,C91.1,I10.0,,,,,,,,,,,,,, 6 | 40,2024-05-06,Hospital,C91.1,,,,,,,,,,,,,,, 7 | 41,1991-02-15,Home,I21.0,I10.0,,,,,,,,,,,,,, 8 | 46,2017-05-20,Hospital,I51.9,,,,,,,,,,,,,,, 9 | 60,2024-05-29,Home,J43.8,,,,,,,,,,,,,,, 10 | 86,2024-09-14,Hospital,C81.0,,,,,,,,,,,,,,, 11 | 97,2024-09-19,Hospital,J10.0,,,,,,,,,,,,,,, 12 | 100,2012-03-08,Home,I60.0,,,,,,,,,,,,,,, 13 | -------------------------------------------------------------------------------- /ehrql/exceptions.py: -------------------------------------------------------------------------------- 1 | class EHRQLException(Exception): 2 | """Base exception for EHRQL errors of all sorts. 3 | 4 | This is not yet reliably used everywhere it should be. 5 | """ 6 | 7 | 8 | class DummyDataException(EHRQLException): 9 | """Base class for dummy data errors.""" 10 | 11 | 12 | class CannotGenerate(DummyDataException): 13 | """Raised when a population definition cannot be satisfied. 14 | 15 | This may be because it is logically impossible, or it may be 16 | logically possible but we were unable to do so. 17 | """ 18 | -------------------------------------------------------------------------------- /ehrql/file_formats/__init__.py: -------------------------------------------------------------------------------- 1 | from ehrql.file_formats.base import FileValidationError 2 | from ehrql.file_formats.main import ( 3 | FILE_FORMATS, 4 | get_file_extension, 5 | read_rows, 6 | read_tables, 7 | split_directory_and_extension, 8 | write_rows, 9 | write_tables, 10 | ) 11 | 12 | 13 | __all__ = [ 14 | "FileValidationError", 15 | "FILE_FORMATS", 16 | "get_file_extension", 17 | "read_rows", 18 | "read_tables", 19 | "split_directory_and_extension", 20 | "write_rows", 21 | "write_tables", 22 | ] 23 | -------------------------------------------------------------------------------- /ehrql/file_formats/console.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handles writing rows/tables to the console for local development and debugging. 3 | 4 | At present, this just uses the CSV writer but there's scope for using something a bit 5 | prettier and more readable here in future. 6 | """ 7 | 8 | import sys 9 | 10 | from ehrql.file_formats.csv import write_rows_csv_lines 11 | 12 | 13 | def write_rows_console(rows, column_specs): 14 | write_rows_csv_lines(sys.stdout, rows, column_specs) 15 | 16 | 17 | def write_tables_console(tables, table_specs): 18 | write_table_names = len(table_specs) > 1 19 | first_table = True 20 | for rows, (table_name, column_specs) in zip(tables, table_specs.items()): 21 | if first_table: 22 | first_table = False 23 | else: 24 | # Add whitespace between tables 25 | sys.stdout.write("\n\n") 26 | if write_table_names: 27 | sys.stdout.write(f"{table_name}\n") 28 | write_rows_console(rows, column_specs) 29 | -------------------------------------------------------------------------------- /ehrql/file_formats/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/file_formats/validation.py -------------------------------------------------------------------------------- /ehrql/measures/__init__.py: -------------------------------------------------------------------------------- 1 | from ehrql.dummy_data.measures import DummyMeasuresDataGenerator 2 | from ehrql.measures.calculate import ( 3 | get_column_specs_for_measures, 4 | get_measure_results, 5 | ) 6 | from ehrql.measures.disclosure_control import apply_sdc_to_measure_results 7 | from ehrql.measures.measures import INTERVAL, Measures, create_measures 8 | 9 | 10 | __all__ = [ 11 | "get_column_specs_for_measures", 12 | "get_measure_results", 13 | "apply_sdc_to_measure_results", 14 | "DummyMeasuresDataGenerator", 15 | "INTERVAL", 16 | "Measures", 17 | "create_measures", 18 | ] 19 | -------------------------------------------------------------------------------- /ehrql/measures/disclosure_control.py: -------------------------------------------------------------------------------- 1 | """Statistical Disclosure Control (SDC) 2 | 3 | For more information, see: 4 | https://docs.opensafely.org/releasing-files/ 5 | """ 6 | 7 | SUPPRESSION_THRESHOLD = 7 8 | ROUNDING_MULTIPLE = 5 9 | 10 | 11 | def apply_sdc(value): 12 | assert value >= 0 13 | assert isinstance(value, int) 14 | value = 0 if value <= SUPPRESSION_THRESHOLD else value 15 | value = int(ROUNDING_MULTIPLE * round(value / ROUNDING_MULTIPLE, ndigits=0)) 16 | return value 17 | 18 | 19 | def apply_sdc_to_measure_results(results): 20 | for result in results: 21 | ( 22 | measure_name, 23 | interval_start, 24 | interval_end, 25 | _, 26 | old_numerator, 27 | old_denominator, 28 | *group_names, 29 | ) = result 30 | numerator = apply_sdc(old_numerator) 31 | denominator = apply_sdc(old_denominator) 32 | ratio = numerator / denominator if denominator else None 33 | yield ( 34 | measure_name, 35 | interval_start, 36 | interval_end, 37 | ratio, 38 | numerator, 39 | denominator, 40 | *group_names, 41 | ) 42 | -------------------------------------------------------------------------------- /ehrql/query_engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/query_engines/__init__.py -------------------------------------------------------------------------------- /ehrql/query_engines/local_file.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ehrql.file_formats import read_tables 4 | from ehrql.query_engines.in_memory import InMemoryQueryEngine 5 | from ehrql.query_engines.in_memory_database import InMemoryDatabase 6 | from ehrql.query_model.column_specs import get_column_specs_from_schema 7 | from ehrql.query_model.introspection import get_table_nodes 8 | 9 | 10 | class LocalFileQueryEngine(InMemoryQueryEngine): 11 | """ 12 | Subclass of the in-memory engine which loads its data from files 13 | """ 14 | 15 | database = None 16 | 17 | def get_results_tables(self, dataset): 18 | # Given the dataset supplied determine the tables used and load the associated 19 | # data into the database 20 | self.populate_database( 21 | get_table_nodes(dataset), 22 | ) 23 | # Run the query as normal 24 | return super().get_results_tables(dataset) 25 | 26 | def populate_database(self, table_nodes, allow_missing_columns=True): 27 | table_specs = { 28 | table.name: get_column_specs_from_schema(table.schema) 29 | for table in table_nodes 30 | } 31 | table_rows = read_tables( 32 | Path(self.dsn), 33 | table_specs, 34 | allow_missing_columns=allow_missing_columns, 35 | ) 36 | table_data = dict(zip(table_nodes, table_rows)) 37 | self.database = InMemoryDatabase(table_data) 38 | -------------------------------------------------------------------------------- /ehrql/query_engines/sqlite_dialect.py: -------------------------------------------------------------------------------- 1 | import sqlean 2 | from sqlalchemy.dialects.sqlite.pysqlite import SQLiteDialect_pysqlite 3 | 4 | 5 | class SQLiteDialect(SQLiteDialect_pysqlite): 6 | supports_statement_cache = False 7 | 8 | @classmethod 9 | def import_dbapi(cls): 10 | # Use sqlean rather than the system version 11 | sqlean.extensions.enable("math") 12 | return sqlean.dbapi2 13 | 14 | def do_on_connect(self, connection): 15 | # Set the per-connection flag which makes LIKE queries case-sensitive 16 | connection.execute("PRAGMA case_sensitive_like = 1;") 17 | 18 | def on_connect(self): 19 | # `on_connect` must return a callable to be executed 20 | return self.do_on_connect 21 | -------------------------------------------------------------------------------- /ehrql/query_engines/trino_dialect.py: -------------------------------------------------------------------------------- 1 | from trino.sqlalchemy.compiler import ( 2 | TrinoDDLCompiler as BaseTrinoDDLCompiler, 3 | ) 4 | from trino.sqlalchemy.compiler import ( 5 | TrinoTypeCompiler as BaseTrinoTypeCompiler, 6 | ) 7 | from trino.sqlalchemy.dialect import TrinoDialect as BaseTrinoDialect 8 | 9 | 10 | class TrinoDDLCompiler(BaseTrinoDDLCompiler): 11 | def get_column_specification(self, column, **kwargs): 12 | """ 13 | Prevent SQLAlchemy from trying to create NOT NULL column constraints, which 14 | some Trino connectors don't support (particularly the memory connector, 15 | which is used for tests). 16 | 17 | This is only required by the SQLAlchemy ORM layer and therefore only 18 | used in test. 19 | """ 20 | colspec = super().get_column_specification(column, **kwargs) 21 | colspec = colspec.replace(" NOT NULL", "") 22 | return colspec 23 | 24 | def visit_primary_key_constraint(self, constraint, **kw): 25 | """ 26 | Prevent SQLAlchemy from trying to create PRIMARY KEY constraints, which 27 | some Trino connectors don't support (particularly the memory connector, 28 | which is used for tests). 29 | 30 | This is only required by the SQLAlchemy ORM layer and therefore only 31 | used in test. 32 | """ 33 | return "" 34 | 35 | 36 | class TrinoTypeCompiler(BaseTrinoTypeCompiler): 37 | def visit_FLOAT(self, type_, **kw): 38 | """Make SQLAlchemy use 64-bit precision for floats.""" 39 | 40 | assert type_.precision is None 41 | return self.visit_DOUBLE(type_, **kw) 42 | 43 | 44 | class TrinoDialect(BaseTrinoDialect): 45 | supports_statement_cache = True 46 | ddl_compiler = TrinoDDLCompiler 47 | type_compiler = TrinoTypeCompiler 48 | 49 | # Tell SQLAlchemy it can used batched insert options for faster test setup 50 | supports_multivalues_insert = True 51 | use_insertmanyvalues = True 52 | use_insertmanyvalues_wo_returning = True 53 | -------------------------------------------------------------------------------- /ehrql/query_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/query_model/__init__.py -------------------------------------------------------------------------------- /ehrql/query_model/introspection.py: -------------------------------------------------------------------------------- 1 | from ehrql.query_model.nodes import ( 2 | InlinePatientTable, 3 | SelectPatientTable, 4 | SelectTable, 5 | get_input_nodes, 6 | ) 7 | 8 | 9 | def all_nodes(tree): # pragma: no cover 10 | nodes = [] 11 | 12 | for subnode in get_input_nodes(tree): 13 | for node in all_nodes(subnode): 14 | nodes.append(node) 15 | return [tree] + nodes 16 | 17 | 18 | def count_nodes(tree): # pragma: no cover 19 | return len(all_nodes(tree)) 20 | 21 | 22 | def node_types(tree): # pragma: no cover 23 | return [type(node) for node in all_nodes(tree)] 24 | 25 | 26 | def all_unique_nodes(*nodes): 27 | found = set() 28 | for node in nodes: 29 | gather_unique_nodes(node, found) 30 | return found 31 | 32 | 33 | def gather_unique_nodes(node, found): 34 | found.add(node) 35 | for subnode in get_input_nodes(node): 36 | if subnode not in found: 37 | gather_unique_nodes(subnode, found) 38 | 39 | 40 | def get_table_nodes(*nodes): 41 | return { 42 | subnode 43 | for subnode in all_unique_nodes(*nodes) 44 | if isinstance(subnode, SelectTable | SelectPatientTable) 45 | } 46 | 47 | 48 | def all_inline_patient_ids(*nodes): 49 | """ 50 | Given some nodes, return a set of all the patient IDs contained in any inline tables 51 | referenced by those nodes 52 | """ 53 | patient_ids = set() 54 | for node in all_unique_nodes(*nodes): 55 | if isinstance(node, InlinePatientTable): 56 | patient_ids.update(row[0] for row in node.rows) 57 | return patient_ids 58 | -------------------------------------------------------------------------------- /ehrql/sqlalchemy_types.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import sqlalchemy 4 | 5 | 6 | TYPE_MAP = { 7 | bool: sqlalchemy.Boolean, 8 | datetime.date: sqlalchemy.Date, 9 | float: sqlalchemy.Float, 10 | int: sqlalchemy.Integer, 11 | str: sqlalchemy.String, 12 | } 13 | 14 | 15 | def type_from_python_type(type_): 16 | "Return the SQLAlchemy Type for a given Python type" 17 | if hasattr(type_, "_primitive_type"): 18 | lookup_type = type_._primitive_type() 19 | else: 20 | lookup_type = type_ 21 | try: 22 | return TYPE_MAP[lookup_type] 23 | except KeyError: 24 | raise TypeError(f"Unsupported column type: {type_}") 25 | -------------------------------------------------------------------------------- /ehrql/tables/__init__.py: -------------------------------------------------------------------------------- 1 | from ehrql.query_language import ( 2 | EventFrame, 3 | PatientFrame, 4 | Series, 5 | table, 6 | table_from_file, 7 | table_from_rows, 8 | ) 9 | from ehrql.query_model.table_schema import Constraint 10 | 11 | 12 | __all__ = [ 13 | "Constraint", 14 | "EventFrame", 15 | "PatientFrame", 16 | "Series", 17 | "table", 18 | "table_from_rows", 19 | "table_from_file", 20 | ] 21 | -------------------------------------------------------------------------------- /ehrql/tables/raw/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/tables/raw/__init__.py -------------------------------------------------------------------------------- /ehrql/tables/raw/emis.py: -------------------------------------------------------------------------------- 1 | """ 2 | This schema defines the data (both primary care and externally linked) available in the 3 | OpenSAFELY-EMIS backend. For more information about this backend, see 4 | "[EMIS Primary Care](https://docs.opensafely.org/data-sources/emis/)". 5 | 6 | The data provided by this schema are minimally transformed. They are very close to the 7 | data provided by the underlying database tables. They are provided for data development 8 | and data curation purposes. 9 | """ 10 | 11 | from ehrql.tables.raw.core import ons_deaths 12 | 13 | 14 | __all__ = [ 15 | "ons_deaths", 16 | ] 17 | -------------------------------------------------------------------------------- /ehrql/tables/smoketest.py: -------------------------------------------------------------------------------- 1 | """ 2 | This tiny schema is used to write a [minimal dataset definition][smoketest_repo] that 3 | can function as a basic end-to-end test (or "smoke test") of the OpenSAFELY platform 4 | across all available backends. 5 | 6 | [smoketest_repo]: https://github.com/opensafely/test-age-distribution 7 | """ 8 | 9 | import datetime 10 | 11 | from ehrql.tables import Constraint, PatientFrame, Series, table 12 | 13 | 14 | __all__ = [ 15 | "patients", 16 | ] 17 | 18 | 19 | @table 20 | class patients(PatientFrame): 21 | date_of_birth = Series( 22 | datetime.date, 23 | description=( 24 | "Patient's year and month of birth, provided in format YYYY-MM-01. " 25 | "The day will always be the first of the month." 26 | ), 27 | constraints=[Constraint.FirstOfMonth(), Constraint.NotNull()], 28 | ) 29 | -------------------------------------------------------------------------------- /ehrql/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/utils/__init__.py -------------------------------------------------------------------------------- /ehrql/utils/docs_utils.py: -------------------------------------------------------------------------------- 1 | def exclude_from_docs(fn): 2 | fn.exclude_from_docs = True 3 | return fn 4 | -------------------------------------------------------------------------------- /ehrql/utils/log_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.config 3 | import os 4 | 5 | 6 | class EHRQLFormatter(logging.Formatter): 7 | def format(self, record): 8 | record.levelname_lower = record.levelname.lower() 9 | return logging.Formatter.format(self, record) 10 | 11 | 12 | CONFIG = { 13 | "version": 1, 14 | "disable_existing_loggers": False, 15 | "formatters": { 16 | "formatter": { 17 | "()": EHRQLFormatter, 18 | "format": "[{levelname_lower:<7}] {message}", 19 | "datefmt": "%Y-%m-%d %H:%M:%S", 20 | "style": "{", 21 | } 22 | }, 23 | "handlers": { 24 | "console": { 25 | "level": "DEBUG", 26 | "class": "logging.StreamHandler", 27 | "formatter": "formatter", 28 | } 29 | }, 30 | "root": { 31 | "handlers": ["console"], 32 | "level": os.getenv("LOG_LEVEL", "CRITICAL"), 33 | }, 34 | "loggers": { 35 | "sqlalchemy.engine": { 36 | "level": "INFO" if os.getenv("LOG_SQL") else "WARN", 37 | }, 38 | }, 39 | } 40 | 41 | 42 | def init_logging(): 43 | logging.config.dictConfig(CONFIG) 44 | 45 | 46 | def kv(kv_pairs): 47 | """Generate a string of kv pairs in space separated k=v format.""" 48 | return " ".join("{}={}".format(k, v) for k, v in kv_pairs.items()) 49 | -------------------------------------------------------------------------------- /ehrql/utils/math_utils.py: -------------------------------------------------------------------------------- 1 | def truediv(lhs, rhs): 2 | """ 3 | Implement Python truediv behaviour but return None when dividing by zero. 4 | """ 5 | if rhs == 0: 6 | return None 7 | else: 8 | return lhs / rhs 9 | 10 | 11 | def floordiv(lhs, rhs): 12 | """ 13 | Implement Python floordiv behaviour but return None when dividing by zero. 14 | """ 15 | if rhs == 0: 16 | return None 17 | else: 18 | return int(lhs // rhs) 19 | 20 | 21 | def get_grouping_level_as_int(all_groups, group_subset): 22 | # Calculate the level of grouping for a subset of group by groups in the 23 | # same way as the grouping ID in sqlserver is calculated - i.e. integer representation of a string of 24 | # 0s and 1s for each column, where a 1 indicates that the column is NOT a grouping column 25 | # https://learn.microsoft.com/en-us/)sql/t-sql/functions/grouping-id-transact-sql?view=sql-server-ver16 26 | if not all_groups: 27 | return 0 28 | return int( 29 | "".join(["0" if group in group_subset else "1" for group in all_groups]), 30 | 2, 31 | ) 32 | -------------------------------------------------------------------------------- /ehrql/utils/module_utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import sys 3 | from pathlib import Path 4 | 5 | 6 | def get_sibling_subclasses(cls): 7 | """ 8 | Return all subclasses of `cls` defined in modules which are siblings of the module 9 | containing `cls` 10 | 11 | For example, sibling subclasses of the class `ehrql.backends.base.SQLBackend` 12 | include: 13 | 14 | ehrql.backends.tpp.TPPBackend 15 | ... 16 | 17 | This is useful for tests and for generating documentation, but isn't intended for 18 | use in runtime code. 19 | """ 20 | module_name = cls.__module__.rpartition(".")[0] 21 | module = sys.modules[module_name] 22 | return [ 23 | obj 24 | for submodule in get_submodules(module) 25 | for obj in vars(submodule).values() 26 | if is_proper_subclass(obj, cls) 27 | ] 28 | 29 | 30 | def get_submodules(module): 31 | """ 32 | Given a module yield all its submodules recursively 33 | """ 34 | submodule_names = [ 35 | f"{module.__name__}.{f.stem}" 36 | for f in Path(module.__file__).parent.glob("*.py") 37 | if f.name != "__init__.py" 38 | ] 39 | subpackage_names = [ 40 | f"{module.__name__}.{f.parent.name}" 41 | for f in Path(module.__file__).parent.glob("*/__init__.py") 42 | ] 43 | for name in submodule_names: 44 | yield importlib.import_module(name) 45 | for name in subpackage_names: 46 | subpackage = importlib.import_module(name) 47 | yield subpackage 48 | yield from get_submodules(subpackage) 49 | 50 | 51 | def is_proper_subclass(value, cls): 52 | try: 53 | return issubclass(value, cls) and value is not cls 54 | except TypeError: 55 | return False 56 | 57 | 58 | def get_all_subclasses(cls): 59 | for subclass in cls.__subclasses__(): 60 | yield subclass 61 | yield from get_all_subclasses(subclass) 62 | -------------------------------------------------------------------------------- /ehrql/utils/sequence_utils.py: -------------------------------------------------------------------------------- 1 | def ordered_set(sequence): 2 | """ 3 | Deduplicates a sequence, maintaining order 4 | """ 5 | return list(dict.fromkeys(sequence)) 6 | -------------------------------------------------------------------------------- /ehrql/utils/string_utils.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | 4 | def strip_indent(s): 5 | """ 6 | Remove indentation from a multiline string 7 | 8 | This is especially useful for taking docstrings and displaying them as markdown. 9 | Note that before de-indenting we strip leading newlines but not leading whitespace 10 | more generally. This allow us to have the opening quotes on a different line from 11 | the text body. 12 | """ 13 | return textwrap.dedent(s.lstrip("\n")).strip() 14 | -------------------------------------------------------------------------------- /hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/hooks/__init__.py -------------------------------------------------------------------------------- /hooks/parent_snippets.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def on_page_markdown(markdown, page, **kwargs): 5 | """ 6 | parent_snippet markers are snippets that are intended to be replaced in the parent 7 | site with appropriate snippet notation. The snippets themselves do not live in 8 | this repo. 9 | 10 | on_page_* methods are called for each Page in a mkdocs site and can modify the 11 | markdown they are given as input. We're using this method to look for the 12 | parent_includes markers and replace them with a note box that indicates in the 13 | built docs that this snippet will be replaced in the full docs build. 14 | 15 | For example: 16 | !!! parent_snippet:'includes/glossary.md' 17 | 18 | will be replaced with: 19 | !!! note "TO BE REPLACED IN FULL DOCS BUILD 20 | This snippet will be replaced in the main docs with the parent file 'includes/glossary.md' 21 | 22 | This allows docs imported from other repos (e.g. ehrql) to reference snippets 23 | in the parent docs, such as the glossary. 24 | """ 25 | parent_snippets = set(re.findall(r"!!! parent_snippet:.+\n", markdown)) 26 | for parent_snippet in parent_snippets: 27 | markdown = markdown.replace( 28 | parent_snippet, 29 | '\n\n!!! note "TO BE REPLACED IN FULL DOCS BUILD"\n\n\tThis snippet will be replaced in the main docs ' 30 | f"with the parent file {parent_snippet.lstrip('!!! parent_snippet:')}", 31 | ) 32 | return markdown 33 | -------------------------------------------------------------------------------- /pyproject.minimal.toml: -------------------------------------------------------------------------------- 1 | # This contains just the minimal configuration needed to be able to install the 2 | # script entrypoints. We use this in the Dockerfile to be able to set up a 3 | # virtualenv with all the right scripts pointing to the right entrypoints 4 | # without creating a dependency on the whole project state so we avoid having 5 | # to rebuild the virtualenv every time any file changes. 6 | # 7 | # A test at `tests/unit/test_pyproject_minimal.py` makes sure that this file 8 | # doesn't get out of sync with the original. 9 | 10 | [project] 11 | name = "opensafely-ehrql" 12 | version = "2+local" 13 | 14 | [project.scripts] 15 | ehrql = "ehrql.__main__:entrypoint" 16 | -------------------------------------------------------------------------------- /requirements.dev.in: -------------------------------------------------------------------------------- 1 | --constraint requirements.prod.txt 2 | 3 | # Additional dev requirements 4 | # To generate a requirements file that includes both prod and dev requirements, run: 5 | # pip-compile --generate-hashes --output-file=requirements.dev.txt requirements.dev.in 6 | 7 | docker 8 | # Pinning hypothesis because something in 6.131.14 has caused the tests to take 9 | # about 2x longer than they did before 10 | # https://github.com/opensafely-core/ehrql/issues/2456 11 | hypothesis==6.131.13 12 | pip-tools 13 | pre-commit 14 | pyright[nodejs] 15 | pytest 16 | pytest-cov 17 | pytest-mock 18 | pytest-xdist 19 | ruff 20 | toml 21 | 22 | # docs 23 | mkdocs 24 | mkdocs-material 25 | 26 | # The following is a work-around for a bug in pip-compile. For more information, see: 27 | # https://github.com/jazzband/pip-tools/issues/2176 28 | pip==25.0.1 29 | -------------------------------------------------------------------------------- /requirements.prod.in: -------------------------------------------------------------------------------- 1 | pyarrow 2 | sqlalchemy 3 | 4 | # Database driver for MS-SQL 5 | pymssql 6 | 7 | # Trino python client and database driver 8 | trino 9 | 10 | # Gives us isolation from the system version of SQLite and means we don't 11 | # need to worry about e.g. some versions of SQLite missing the `FLOOR` 12 | # function. 13 | sqlean.py 14 | 15 | # For graphing query graphs 16 | networkx 17 | pydot 18 | -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | /environ.env 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/__init__.py -------------------------------------------------------------------------------- /tests/acceptance/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/acceptance/__init__.py -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/analysis/codelists.py: -------------------------------------------------------------------------------- 1 | from ehrql import codelist_from_csv 2 | 3 | colorectal_symptom_codes = codelist_from_csv( 4 | "codelists/phc-symptoms-colorectal-cancer.csv", column="code" 5 | ) 6 | 7 | colorectal_diagnosis_codes_snomed = codelist_from_csv( 8 | "codelists/phc-phc-colorectal-cancer-snomed.csv", column="code" 9 | ) 10 | 11 | colorectal_referral_codes = codelist_from_csv( 12 | "codelists/phc-2ww-referral-colorectal.csv", column="code" 13 | ) 14 | 15 | ida_codes = codelist_from_csv( 16 | "codelists/phc-symptom-colorectal-ida.csv", column="code" 17 | ) 18 | 19 | cibh_codes = codelist_from_csv( 20 | "codelists/phc-symptom-colorectal-cibh.csv", column="code" 21 | ) 22 | 23 | prbleeding_codes = codelist_from_csv( 24 | "codelists/phc-symptom-colorectal-pr-bleeding.csv", column="code" 25 | ) 26 | 27 | wl_codes = codelist_from_csv( 28 | "codelists/phc-symptom-colorectal-wl.csv", column="code" 29 | ) 30 | 31 | abdomass_codes = codelist_from_csv( 32 | "codelists/phc-symptom-lowergi-abdo-mass.csv", column="code" 33 | ) 34 | 35 | abdopain_codes = codelist_from_csv( 36 | "codelists/phc-symptom-lowergi-abdo-pain.csv", column="code" 37 | ) 38 | 39 | anaemia_codes = codelist_from_csv( 40 | "codelists/phc-symptom-lowergi-anaemia.csv", column="code" 41 | ) 42 | 43 | fit_codes = codelist_from_csv( 44 | "codelists/phc-fit-test.csv", column="code" 45 | ) 46 | 47 | ethnicity_codes_16 = codelist_from_csv( 48 | "codelists/opensafely-ethnicity-snomed-0removed.csv", 49 | column="snomedcode", 50 | category_column="Grouping_16", 51 | ) 52 | 53 | ethnicity_codes_6 = codelist_from_csv( 54 | "codelists/opensafely-ethnicity-snomed-0removed.csv", 55 | column="snomedcode", 56 | category_column="Grouping_6", 57 | ) 58 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/codelists/phc-2ww-referral-colorectal.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 276401000000108,Fast track referral for suspected colorectal cancer 3 | 276411000000105,Urgent cancer referral - colorectal 4 | 276421000000104,Urgent cancer referral - colorectal 5 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/codelists/phc-colorectal-cancer-icd10.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | C18,Malignant neoplasm of colon 3 | C180,Malignant neoplasm: Caecum 4 | C181,Malignant neoplasm: Appendix 5 | C182,Malignant neoplasm: Ascending colon 6 | C183,Malignant neoplasm: Hepatic flexure 7 | C184,Malignant neoplasm: Transverse colon 8 | C185,Malignant neoplasm: Splenic flexure 9 | C186,Malignant neoplasm: Descending colon 10 | C187,Malignant neoplasm: Sigmoid colon 11 | C188,Malignant neoplasm: Overlapping lesion of colon 12 | C189,"Malignant neoplasm: Colon, unspecified" 13 | C19,Malignant neoplasm of rectosigmoid junction 14 | C20,Malignant neoplasm of rectum 15 | C21,Malignant neoplasm of anus and anal canal 16 | C210,"Malignant neoplasm: Anus, unspecified" 17 | C211,Malignant neoplasm: Anal canal 18 | C212,Malignant neoplasm: Cloacogenic zone 19 | C218,"Malignant neoplasm: Overlapping lesion of rectum, anus and anal canal" 20 | C785,Secondary malignant neoplasm of large intestine and rectum 21 | D374,Neoplasm of uncertain or unknown behaviour: Colon 22 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/codelists/phc-fit-test.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 1015401000000102,Faecal occult blood test 3 | 1049361000000101,Quantitative faecal immunochemical test 4 | 1049371000000108,Quantitative faecal immunochemical test 5 | 389076003,Fecal occult blood: trace 6 | 59614000,Occult blood in stools 7 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/codelists/phc-symptom-colorectal-ida.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 191127009,Anaemia due to chronic blood loss: [iron deficiency] or [normocytic] 3 | 191128004,Iron deficiency anemia due to dietary causes 4 | 191135007,Chlorotic anemia 5 | 191408005,[X]Other iron deficiency anemias 6 | 234351006,Iron deficiency anaemia due to chronic blood loss 7 | 371315009,Iron deficiency anemia secondary to inadequate dietary iron intake 8 | 397761000000103,[X]Other iron deficiency anaemias 9 | 413533008,Anemia due to chronic blood loss 10 | 42626004,Iron deficiency anemia secondary to chronic blood loss 11 | 44252001,Blood loss anemia 12 | 598461000000107,Iron deficiency anemia NOS 13 | 610661000000100,Other specified iron deficiency anemia NOS 14 | 610671000000107,Unspecified iron deficiency anemia 15 | 661301000000100,Other specified iron deficiency anemia 16 | 717948004,Acquired iron deficiency anemia due to increased iron requirement 17 | 722005000,Iron-refractory iron deficiency anemia 18 | 724556004,Iron deficiency anemia due to blood loss 19 | 724557008,Acquired iron deficiency anemia due to decreased absorption 20 | 80126007,Plummer-Vinson syndrome 21 | 87522002,Iron deficiency anemia 22 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/codelists/phc-symptom-colorectal-pr-bleeding.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 1085171000119108,Rectal hemorrhage due to chronic ulcerative pancolitis 3 | 1085221000119103,Rectal hemorrhage due to chronic ulcerative proctitis 4 | 1085271000119102,Rectal hemorrhage due to chronic ulcerative rectosigmoiditis 5 | 1085431000119101,Rectal hemorrhage due to inflammatory polyps of colon 6 | 1085791000119105,Rectal hemorrhage due to Crohn's disease of large intestine 7 | 1085841000119108,Rectal hemorrhage due to Crohn's disease of small and large intestines 8 | 1085891000119100,Rectal hemorrhage due to Crohn's disease of small intestine 9 | 1085941000119104,Rectal hemorrhage due to Crohn's disease 10 | 1092881000119105,Rectal hemorrhage due to ulcerative colitis 11 | 12063002,Rectal hemorrhage 12 | 164451000000109,Painful rectal bleeding 13 | 164461000000107,Painless rectal bleeding 14 | 171731000000100,Painful rectal bleeding 15 | 171741000000109,Painless rectal bleeding 16 | 266464001,Hemorrhage of rectum and anus 17 | 414991007,Painful rectal bleeding 18 | 414992000,Painless rectal bleeding 19 | 571611000000105,Hemorrhage of rectum and anus NOS 20 | 721690003,Acute hemorrhagic ulcer of rectum 21 | 981008,Hemorrhagic proctitis 22 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/mainroute_cancer/codelists/phc-symptom-colorectal-wl.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 139089007,Weight decreasing 3 | 139091004,Weight loss (& abnormal) 4 | 158271000,[D]Abnormal loss of weight 5 | 161832001,Weight decreasing 6 | 161834000,Abnormal weight loss (& [symptom]) 7 | 198511000000103,Complaining of weight loss 8 | 206919000,[D]Abnormal loss of weight 9 | 213791000000109,Complaining of weight loss 10 | 213801000000108,Complaining of weight loss 11 | 23712001,Abnormal decrease in weight 12 | 267024001,Abnormal weight loss 13 | 267158006,Weight loss (& abnormal) 14 | 422868009,Unexplained weight loss 15 | 448765001,Unintentional weight loss 16 | 496901000000107,[D]Abnormal loss of weight 17 | 511461000000103,Unexplained weight loss 18 | 699205002,Involuntary weight loss 19 | 768571000000103,Unintentional weight loss 20 | 768581000000101,Unintentional weight loss 21 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/analysis/dataset_definition_dm017.py: -------------------------------------------------------------------------------- 1 | from ehrql import INTERVAL, Measures, months 2 | from ehrql.tables.tpp import patients 3 | 4 | from dm_dataset import ( 5 | make_dm_dataset, 6 | get_registration_status, 7 | get_dm_reg_r1, 8 | get_dm_reg_r2, 9 | ) 10 | 11 | index_date = INTERVAL.start_date 12 | 13 | # Instantiate dataset and define clinical variables 14 | dataset = make_dm_dataset(index_date=index_date) 15 | 16 | # Define registration status 17 | # NOTE: this is not identical to GMS registration status 18 | has_registration = get_registration_status(index_date) 19 | 20 | # Define diabetes register (DM_REG) rules: 21 | dataset.dm_reg_r1 = get_dm_reg_r1(dataset) 22 | dataset.dm_reg_r2 = get_dm_reg_r2(dataset) 23 | 24 | # Define select rule 2 25 | has_dm_reg_select_r2 = dataset.dm_reg_r1 & ~dataset.dm_reg_r2 26 | 27 | # Define DM017 numerator and denominator 28 | dm017_numerator = has_dm_reg_select_r2 29 | dm017_denominator = has_registration 30 | 31 | # Define measures 32 | measures = Measures() 33 | 34 | measures.define_measure( 35 | name="dm017", 36 | numerator=dm017_numerator, 37 | denominator=dm017_denominator, 38 | group_by={"sex": patients.sex}, 39 | intervals=months(12).starting_on("2022-03-01"), 40 | ) 41 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/analysis/variable_lib_helper.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from functools import reduce 3 | 4 | from ehrql.codes import ICD10Code 5 | from ehrql import case, when 6 | from ehrql.tables import tpp as schema 7 | 8 | 9 | def first_matching_event(events, codelist, where=True): 10 | return ( 11 | events.where(where) 12 | .where(events.snomedct_code.is_in(codelist)) 13 | .sort_by(events.date) 14 | .first_for_patient() 15 | ) 16 | 17 | 18 | def last_matching_event(events, codelist, where=True): 19 | return ( 20 | events.where(where) 21 | .where(events.snomedct_code.is_in(codelist)) 22 | .sort_by(events.date) 23 | .last_for_patient() 24 | ) 25 | 26 | 27 | def age_as_of(date): 28 | return (date - schema.patients.date_of_birth).years 29 | 30 | 31 | # TODO this is not exactly the same as died_from_any_cause(). 32 | # Note that this function only checks the patient table 33 | def died_as_of(date): 34 | return schema.patients.date_of_death.is_not_null() & ( 35 | schema.patients.date_of_death < date 36 | ) 37 | 38 | 39 | def _registrations_overlapping_period(start_date, end_date): 40 | regs = schema.practice_registrations 41 | return regs.where( 42 | regs.start_date.is_on_or_before(start_date) 43 | & (regs.end_date.is_after(end_date) | regs.end_date.is_null()) 44 | ) 45 | 46 | 47 | def practice_registration_as_of(date): 48 | regs = _registrations_overlapping_period(date, date) 49 | return regs.sort_by(regs.start_date, regs.end_date).first_for_patient() 50 | 51 | 52 | def get_events_on_or_between(events, codelist, start_date, end_date, where=True): 53 | return ( 54 | events.where(where) 55 | .where(events.snomedct_code.is_in(codelist)) 56 | .where(events.date.is_on_or_between(start_date, end_date)) 57 | ) 58 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-bldtestdec_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 116471000119100,Blood test declined 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dminvite_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 1066911000000100,Diabetes monitoring short message service text message first invitation 3 | 1066921000000106,Diabetes monitoring short message service text message second invitation 4 | 1066931000000108,Diabetes monitoring short message service text message third invitation 5 | 1083111000000108,Diabetes monitoring invitation email 6 | 1109921000000106,Quality and Outcomes Framework quality indicator-related care invitation 7 | 1110921000000100,Quality and Outcomes Framework diabetes mellitus quality indicator-related care invitation 8 | 143401000000102,Quality and Outcomes Framework diabetes mellitus quality indicator-related care invitation using preferred method of communication 9 | 185756006,Diabetes monitoring first letter 10 | 185757002,Diabetes monitoring second letter 11 | 185758007,Diabetes monitoring third letter 12 | 185759004,Diabetes monitoring verbal invite 13 | 185760009,Diabetes monitoring telephone invite 14 | 310425007,Diabetes monitoring invitation 15 | 705072004,Diabetes monitoring invitation by short message service text messaging 16 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmmax_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 407569005,Patient on maximal tolerated therapy for diabetes 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmpcadec_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 716031000000106,Excepted from diabetes quality indicators - informed dissent 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmpcapu_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 717421000000100,Excepted from diabetes quality indicators - patient unsuitable 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmres_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 315051004,Diabetes resolved 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-ifcchbam_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 1049301000000100,Haemoglobin A1c level (diagnostic reference range) - International Federation of Clinical Chemistry and Laboratory Medicine standardised 3 | 1049321000000109,Haemoglobin A1c level (monitoring ranges) - International Federation of Clinical Chemistry and Laboratory Medicine standardised 4 | 999791000000106,Haemoglobin A1c level - International Federation of Clinical Chemistry and Laboratory Medicine standardised 5 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-mildfrail_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 925791000000100,Mild frailty 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-modfrail_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 925831000000107,Moderate frailty 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-serfruc_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 1006751000000102,Serum fructosamine level 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-sevfrail_cod.csv: -------------------------------------------------------------------------------- 1 | code,term 2 | 925861000000102,Severe frailty 3 | -------------------------------------------------------------------------------- /tests/acceptance/external_studies/test-age-distribution/analysis/dataset_definition.py: -------------------------------------------------------------------------------- 1 | from ehrql import create_dataset 2 | from ehrql.tables.smoketest import patients 3 | 4 | index_year = 2022 5 | min_age = 18 6 | max_age = 80 7 | 8 | year_of_birth = patients.date_of_birth.year 9 | age = index_year - year_of_birth 10 | 11 | dataset = create_dataset() 12 | dataset.define_population((age >= min_age) & (age <= max_age)) 13 | dataset.age = age 14 | -------------------------------------------------------------------------------- /tests/autocomplete/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/autocomplete/__init__.py -------------------------------------------------------------------------------- /tests/docker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/docker/__init__.py -------------------------------------------------------------------------------- /tests/docker/test_drivers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def test_driver_in_container(call_cli_docker, engine): 5 | # This test doesn't make sense for these in-memory databases 6 | if engine.name in {"in_memory", "sqlite"}: 7 | pytest.skip() 8 | 9 | backends = { 10 | "mssql": "ehrql.backends.tpp.TPPBackend", 11 | "trino": "ehrql.backends.emis.EMISBackend", 12 | } 13 | 14 | if engine.name not in backends: 15 | assert False, f"no backend for database: {engine.name}" 16 | 17 | backend = backends[engine.name] 18 | url = engine.database.container_url() 19 | 20 | call_cli_docker( 21 | "test-connection", 22 | "--backend", 23 | backend, 24 | "--url", 25 | url, 26 | ) 27 | -------------------------------------------------------------------------------- /tests/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/docs/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/bad_import.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql.tables.smoketest import no_such_table # noqa: F401 3 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/bad_syntax.py: -------------------------------------------------------------------------------- 1 | what even is a Python 2 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/bad_types.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql.tables.core import patients 3 | 4 | 5 | patients.date_of_birth == patients.sex 6 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/empty_measures.py: -------------------------------------------------------------------------------- 1 | from ehrql import Measures 2 | 3 | measures = Measures() 4 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/no_dataset.py: -------------------------------------------------------------------------------- 1 | datasat = {} 2 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/no_measures.py: -------------------------------------------------------------------------------- 1 | measuuuuures = [] 2 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/no_population.py: -------------------------------------------------------------------------------- 1 | from ehrql import Dataset 2 | 3 | dataset = Dataset() 4 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/not_a_dataset.py: -------------------------------------------------------------------------------- 1 | dataset = object() 2 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/not_measures_instance.py: -------------------------------------------------------------------------------- 1 | measures = object() 2 | -------------------------------------------------------------------------------- /tests/fixtures/bad_definition_files/operator_error.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql.tables.core import patients 3 | 4 | 5 | patients.date_of_birth == "2000-01-01" | patients.date_of_birth == "1990-01-01" 6 | -------------------------------------------------------------------------------- /tests/fixtures/codelist_csvs/categories.csv: -------------------------------------------------------------------------------- 1 | code,description,category 2 | 123A,Asthma,respiratory 3 | 123B,Severe asthma,respiratory 4 | 234C,Hypertension,other 5 | 345D,Hyperthyroidism,other 6 | -------------------------------------------------------------------------------- /tests/fixtures/codelist_csvs/custom_col.csv: -------------------------------------------------------------------------------- 1 | 123Codes,description 2 | 123-A,Asthma 3 | 123-B,Severe asthma 4 | 123-C,Hypertension 5 | 123-D,Hyperthyroidism 6 | -------------------------------------------------------------------------------- /tests/fixtures/codelist_csvs/default_col.csv: -------------------------------------------------------------------------------- 1 | code,description 2 | 123A,Asthma 3 | 123B,Severe asthma 4 | 234C,Hypertension 5 | 345D,Hyperthyroidism 6 | -------------------------------------------------------------------------------- /tests/fixtures/codelist_csvs/extra_whitespace.csv: -------------------------------------------------------------------------------- 1 | code,description 2 | W123 ,Asthma 3 | W234, Severe asthma 4 | W345, Hypertension 5 | W456,Hyperthyroidism 6 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_2021-01-01.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,2,3 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_2021-02-01.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 4,5,6 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_2021-03-01.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 7,8,9 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_20210908.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 10,11,12 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_code_2021-03-01.csv: -------------------------------------------------------------------------------- 1 | d,e,f 2 | 1,2,3 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_code_2021-04-01.csv: -------------------------------------------------------------------------------- 1 | d,e,f 2 | 4,5,6 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_error_2021-01-01.csv: -------------------------------------------------------------------------------- 1 | a,b,d 2 | 1,2,3 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_error_2021-02-01.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,2,3 3 | -------------------------------------------------------------------------------- /tests/fixtures/csv_date_merging/measure_test_event.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 0,0,0 3 | -------------------------------------------------------------------------------- /tests/fixtures/debug/patients.csv: -------------------------------------------------------------------------------- 1 | patient_id,date_of_birth,sex,date_of_death 2 | 1,1980-01-01,female, 3 | 2,1990-02-01,male, 4 | 3,2000-03-01,female, 5 | 4,2010-04-01,male, 6 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/dummy-data.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count 2 | 11,F,1,2021-01-01,1 3 | 22,M,0,, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/dummy-data.txt: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count 2 | 11,F,1,2021-01-01,1 3 | 22,M,0,, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/extra-column.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count,extra_col 2 | 11,F,1,2021-01-01,1, 3 | 22,M,0,,, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/invalid-bool.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count 2 | 11,F,X,2021-01-01,1 3 | 22,M,0,, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/invalid-date.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count 2 | 11,F,1,2021-021-021,1 3 | 22,M,0,, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/invalid-patient-id.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count 2 | Eleven,F,1,2021-01-01,1 3 | 22,M,0,, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/missing-column.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_count 2 | 11,F,1,1 3 | 22,M,0, 4 | -------------------------------------------------------------------------------- /tests/fixtures/dummy_data/zero-date.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,has_event,event_date,event_count 2 | 11,F,1,2021-10-01,1 3 | 22,M,0,, 4 | 33,M,,0, 5 | -------------------------------------------------------------------------------- /tests/fixtures/good_definition_files/assurance.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from datetime import date 3 | 4 | from ehrql import Dataset 5 | from ehrql.tables.core import patients 6 | 7 | 8 | dataset = Dataset() 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01")) 10 | 11 | test_data = { 12 | # Correctly not expected in population 13 | 1: { 14 | "patients": {"date_of_birth": date(1999, 12, 1)}, 15 | "expected_in_population": False, 16 | }, 17 | } 18 | -------------------------------------------------------------------------------- /tests/fixtures/good_definition_files/chatty_dataset_definition.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | import sys 3 | 4 | from ehrql import create_dataset 5 | from ehrql.tables.core import patients 6 | 7 | 8 | print("I am a bit chatty", file=sys.stderr) 9 | 10 | dataset = create_dataset() 11 | dataset.year_of_birth = patients.date_of_birth.year 12 | dataset.define_population(patients.exists_for_patient()) 13 | -------------------------------------------------------------------------------- /tests/fixtures/good_definition_files/dataset_definition.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql import create_dataset 3 | from ehrql.tables.core import patients 4 | 5 | 6 | dataset = create_dataset() 7 | dataset.year_of_birth = patients.date_of_birth.year 8 | dataset.sex = patients.sex 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01")) 10 | -------------------------------------------------------------------------------- /tests/fixtures/good_definition_files/dataset_definition_with_print.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql import create_dataset 3 | from ehrql.tables.core import patients 4 | 5 | 6 | dataset = create_dataset() 7 | dataset.year_of_birth = patients.date_of_birth.year 8 | dataset.sex = patients.sex 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01")) 10 | print("user stdout") 11 | -------------------------------------------------------------------------------- /tests/fixtures/good_definition_files/debug_definition.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql import create_dataset, show 3 | from ehrql.tables.core import patients 4 | 5 | 6 | dataset = create_dataset() 7 | show(dataset) 8 | dataset.sex = patients.sex 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01")) 10 | dataset.year_of_birth = patients.date_of_birth.year 11 | -------------------------------------------------------------------------------- /tests/fixtures/good_definition_files/measure_definitions.py: -------------------------------------------------------------------------------- 1 | # noqa: INP001 2 | from ehrql import INTERVAL, Measures, years 3 | from ehrql.tables.core import patients 4 | 5 | 6 | measures = Measures() 7 | 8 | measures.define_measure( 9 | "births", 10 | numerator=patients.date_of_birth.is_during(INTERVAL), 11 | denominator=patients.exists_for_patient(), 12 | group_by={"sex": patients.sex}, 13 | intervals=years(2).starting_on("2020-01-01"), 14 | ) 15 | -------------------------------------------------------------------------------- /tests/fixtures/local_file_engine/events.csv: -------------------------------------------------------------------------------- 1 | patient_id,score 2 | 1,2 3 | 1,3 4 | 1,4 5 | 2,5 6 | 2,10 7 | -------------------------------------------------------------------------------- /tests/fixtures/local_file_engine/patients.csv: -------------------------------------------------------------------------------- 1 | patient_id,sex,ignored_column 2 | 1,M,a 3 | 2,F,b 4 | 3,, 5 | -------------------------------------------------------------------------------- /tests/fixtures/quiz-example-data/addresses.csv: -------------------------------------------------------------------------------- 1 | patient_id,address_id,start_date,end_date,rural_urban_classification,imd_rounded,msoa_code 2 | 2,1001,1993-01-30,2015-08-13,2,19600,E02008618 3 | 2,1002,2015-08-13,,4,11700,E02002421 4 | 4,3001,2007-06-01,,7,28600,E02000623 5 | 5,4001,1981-03-12,2018-05-23,5,21200,E02000692 6 | 6,5001,2020-07-13,,2,5300,E02003054 7 | 7,6001,2013-11-19,,3,27700,E02007050 8 | 8,7001,2018-12-03,,2,16800,E02006760 9 | 9,8001,1999-04-29,2017-11-10,1,26800,E02003208 10 | 9,8002,2017-11-10,,2,21400,E02009132 11 | 10,9001,2005-03-28,,1,6900,E02001319 12 | 10,9002,2015-04-18,,1,6400,E02001792 13 | -------------------------------------------------------------------------------- /tests/fixtures/quiz-example-data/clinical_events.csv: -------------------------------------------------------------------------------- 1 | patient_id,date,snomedct_code,ctv3_code,numeric_value 2 | 1,2014-01-11,195967001,H33.., 3 | 1,2014-04-10,60621009,X76C0,25.8 4 | 2,2015-08-06,195967001,H33.., 5 | 2,2017-04-12,60621009,X76C2,18.4 6 | 2,2018-05-26,60621009,X76C1,23.1 7 | 2,2020-05-17,73211009,C10.., 8 | 3,2017-05-11,60621009,X76C3,29.5 9 | 4,2019-05-16,60621009,X76C4,34.3 10 | 4,2022-11-09,73211009,C10.., 11 | 5,2017-05-11,195967001,H33.., 12 | 5,2017-05-23,60621009,X76C5,22.3 13 | 5,2017-08-01,60621009,X76C6,19.9 14 | 6,2017-07-11,73211009,C10.., 15 | 6,2018-08-16,60621009,X76C7,22.8 16 | 6,2019-07-06,195967001,H33.., 17 | 7,2018-01-06,60621009,X76C8,35.2 18 | 8,2021-01-27,73211009,C10.., 19 | 8,2022-10-25,60621009,X76C9,16.3 20 | 9,2015-07-12,195967001,H33.., 21 | 10,2015-03-14,73211009,C10.., 22 | -------------------------------------------------------------------------------- /tests/fixtures/quiz-example-data/medications.csv: -------------------------------------------------------------------------------- 1 | patient_id,date,dmd_code 2 | 1,2014-01-11,39113611000001102 3 | 2,2015-08-06,39113611000001102 4 | 2,2018-09-21,39113311000001107 5 | 2,2020-05-17,22777311000001105 6 | 4,2022-11-09,22777311000001105 7 | 5,2017-05-11,39113611000001102 8 | 6,2017-07-11,3484711000001105 9 | 6,2019-07-06,39113611000001102 10 | 8,2021-01-27,3484711000001105 11 | 10,2015-03-14,3484711000001105 12 | -------------------------------------------------------------------------------- /tests/fixtures/quiz-example-data/ons_deaths.csv: -------------------------------------------------------------------------------- 1 | "patient_id","date","place","underlying_cause_of_death","cause_of_death_01","cause_of_death_02","cause_of_death_03","cause_of_death_04","cause_of_death_05","cause_of_death_06","cause_of_death_07","cause_of_death_08","cause_of_death_09","cause_of_death_10","cause_of_death_11","cause_of_death_12","cause_of_death_13","cause_of_death_14","cause_of_death_15" 2 | 1,2015-09-14,"Hospital","C91.1",,,,,,,,,,,,,,, 3 | 5,2018-05-23,"Home","I21.0",,,,,,,,,,,,,,, 4 | 9,2017-11-10,"Care Home","I69.4",,,,,,,,,,,,,,, 5 | -------------------------------------------------------------------------------- /tests/fixtures/quiz-example-data/patients.csv: -------------------------------------------------------------------------------- 1 | patient_id,date_of_birth,sex,date_of_death 2 | 1,1973-07-01,female,2015-09-14 3 | 2,1948-03-01,male, 4 | 3,2003-04-01,male, 5 | 4,2007-06-01,female, 6 | 5,1938-10-01,male,2018-05-23 7 | 6,1994-04-01,female, 8 | 7,1953-05-01,male, 9 | 8,1992-08-01,female, 10 | 9,1931-10-01,female,2017-11-10 11 | 10,1979-04-01,male, 12 | -------------------------------------------------------------------------------- /tests/fixtures/quiz-example-data/practice_registrations.csv: -------------------------------------------------------------------------------- 1 | patient_id,start_date,end_date,practice_pseudo_id,practice_stp,practice_nuts1_region_name 2 | 1,2012-09-23,2015-09-14,7055,E54000048,North West 3 | 2,1993-02-04,,975,E54000009,North East 4 | 3,2004-12-03,,6669,E54000019,West Midlands 5 | 4,2007-09-13,2019-03-04,7199,E54000026,East 6 | 4,2019-01-02,,7199,E54000026,East 7 | 5,2007-07-21,2018-05-23,8595,E54000024,East 8 | 6,2020-09-14,,5045,E54000017,West Midlands 9 | 7,2013-12-05,,7588,E54000050,Yorkshire and The Humber 10 | 7,2017-05-12,,5121,E54000050,Yorkshire and The Humber 11 | 8,2020-01-19,,1858,E54000012,West Midlands 12 | 9,1995-03-01,2017-11-10,8189,E54000023,East 13 | 10,2001-10-24,,7783,E54000037,South West 14 | -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/functional/__init__.py -------------------------------------------------------------------------------- /tests/functional/test_assure.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | FIXTURES_PATH = Path(__file__).parents[1] / "fixtures" / "good_definition_files" 5 | 6 | 7 | def test_assure(call_cli): 8 | captured = call_cli("assure", FIXTURES_PATH / "assurance.py") 9 | assert "All OK" in captured.out 10 | -------------------------------------------------------------------------------- /tests/functional/test_dump_example_data.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | 3 | 4 | def test_dump_example_data(call_cli, tmp_path): 5 | with contextlib.chdir(tmp_path): 6 | call_cli("dump-example-data") 7 | filenames = [path.name for path in (tmp_path / "example-data").iterdir()] 8 | assert "patients.csv" in filenames 9 | -------------------------------------------------------------------------------- /tests/functional/test_entrypoint.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import subprocess 3 | import sys 4 | 5 | 6 | def test_entrypoint(): 7 | # Include the Python executable directory on the path so that even if the virtualenv 8 | # isn't activated we can still find the `ehrql` executable. 9 | path = os.pathsep.join( 10 | [os.path.dirname(sys.executable), os.environ.get("PATH", "")] 11 | ) 12 | 13 | result = subprocess.run( 14 | ["ehrql", "--help"], 15 | capture_output=True, 16 | text=True, 17 | check=True, 18 | env={"PATH": path}, 19 | ) 20 | assert "usage: ehrql [-h]" in result.stdout 21 | -------------------------------------------------------------------------------- /tests/functional/test_graph_query.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | 7 | FIXTURES_PATH = Path(__file__).parents[1] / "fixtures" / "good_definition_files" 8 | 9 | 10 | @pytest.mark.skipif( 11 | shutil.which("dot") is None, 12 | reason="Graphing requires Graphviz library", 13 | ) 14 | def test_graph_query(call_cli, tmp_path): # pragma: no cover 15 | output_file = tmp_path / "query.svg" 16 | call_cli( 17 | "graph-query", 18 | FIXTURES_PATH / "dataset_definition.py", 19 | "--output", 20 | output_file, 21 | ) 22 | assert output_file.exists() 23 | -------------------------------------------------------------------------------- /tests/functional/test_isolation_report.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | import pytest 5 | 6 | 7 | @pytest.mark.skipif( 8 | not sys.platform.startswith("linux"), 9 | reason="Subprocess isolation only works on Linux", 10 | ) 11 | def test_isolation_report(call_cli): 12 | captured = call_cli("isolation-report") 13 | assert json.loads(captured.out) 14 | -------------------------------------------------------------------------------- /tests/functional/test_serialize_definition.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | 7 | FIXTURES_PATH = Path(__file__).parents[1] / "fixtures" / "good_definition_files" 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "definition_type,definition_file", 12 | [ 13 | ("dataset", FIXTURES_PATH / "dataset_definition.py"), 14 | ("measures", FIXTURES_PATH / "measure_definitions.py"), 15 | ("test", FIXTURES_PATH / "assurance.py"), 16 | ], 17 | ) 18 | def test_serialize_definition(definition_type, definition_file, call_cli): 19 | captured = call_cli( 20 | "serialize-definition", 21 | "--definition-type", 22 | definition_type, 23 | definition_file, 24 | ) 25 | # We rely on tests elsewhere to ensure that the serialization is working correctly; 26 | # here we just want to check that we return valid JSON 27 | assert json.loads(captured.out) 28 | # We shouldn't be producing any warnings or any other output 29 | assert captured.err == "" 30 | -------------------------------------------------------------------------------- /tests/functional/test_test_connection.py: -------------------------------------------------------------------------------- 1 | def test_test_connection(mssql_database, call_cli): 2 | env = { 3 | "BACKEND": "ehrql.backends.tpp.TPPBackend", 4 | "DATABASE_URL": mssql_database.host_url(), 5 | } 6 | captured = call_cli("test-connection", environ=env) 7 | assert "SUCCESS" in captured.out 8 | -------------------------------------------------------------------------------- /tests/generative/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/generative/__init__.py -------------------------------------------------------------------------------- /tests/generative/conftest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from .recording import recorder 4 | 5 | 6 | __all__ = ["recorder"] 7 | 8 | 9 | class BrokenDatabaseError(KeyboardInterrupt): 10 | def __init__(self, database): # pragma: no cover 11 | self.database = database 12 | 13 | 14 | def pytest_keyboard_interrupt(excinfo): # pragma: no cover 15 | if isinstance(excinfo.value, BrokenDatabaseError): 16 | print(f"Unrecoverably broken {excinfo.value.database} database") 17 | sys.exit(6) 18 | -------------------------------------------------------------------------------- /tests/generative/data_setup.py: -------------------------------------------------------------------------------- 1 | from ehrql.query_model.nodes import ( 2 | AggregateByPatient, 3 | Function, 4 | SelectPatientTable, 5 | SelectTable, 6 | ) 7 | from tests.lib.orm_utils import orm_classes_from_tables 8 | 9 | 10 | def setup(schema, num_patient_tables, num_event_tables): 11 | patient_tables = [ 12 | SelectPatientTable(f"p{i}", schema=schema) for i in range(num_patient_tables) 13 | ] 14 | event_tables = [ 15 | SelectTable(f"e{i}", schema=schema) for i in range(num_event_tables) 16 | ] 17 | all_tables = patient_tables + event_tables 18 | 19 | orm_classes = orm_classes_from_tables(all_tables) 20 | _add_classes_to_module_namespace(orm_classes) 21 | 22 | patient_classes = [orm_classes[table.name] for table in patient_tables] 23 | event_classes = [orm_classes[table.name] for table in event_tables] 24 | 25 | all_patients_query = _build_query(all_tables) 26 | 27 | # We arbitrarily choose the first patient class, but all the ORM classes share the 28 | # same MetaData 29 | metadata = patient_classes[0].metadata 30 | 31 | return ( 32 | patient_classes, 33 | event_classes, 34 | all_patients_query, 35 | metadata, 36 | ) 37 | 38 | 39 | def _add_classes_to_module_namespace(orm_classes): 40 | # It's helpful to have the classes available as module properties so that we can 41 | # copy-paste failing test cases from Hypothesis. These classes naturally believe 42 | # that they belong to the `orm_utils` module which created them, so we have to 43 | # re-parent them here. We use only the final component of the module name as that's 44 | # how we import it in `test_query_model`. 45 | for class_ in orm_classes.values(): 46 | class_.__module__ = __name__.rpartition(".")[2] 47 | globals()[class_.__name__] = class_ 48 | 49 | 50 | def _build_query(tables): 51 | clauses = [AggregateByPatient.Exists(source=table) for table in tables] 52 | return _join_with_or(clauses) 53 | 54 | 55 | def _join_with_or(clauses): 56 | query = clauses[0] 57 | for clause in clauses[1:]: 58 | query = Function.Or(query, clause) 59 | return query 60 | -------------------------------------------------------------------------------- /tests/generative/example.py: -------------------------------------------------------------------------------- 1 | # A tiny example of a generative test case defined in a standalone file so that we can 2 | # check that the `test_query_model_example_file` function works correctly 3 | from ehrql.query_model.nodes import ( 4 | AggregateByPatient, 5 | Column, 6 | Dataset, 7 | SelectColumn, 8 | SelectPatientTable, 9 | TableSchema, 10 | ) 11 | 12 | 13 | p0 = SelectPatientTable( 14 | "p0", 15 | TableSchema( 16 | i1=Column(int), 17 | ), 18 | ) 19 | 20 | dataset = Dataset( 21 | population=AggregateByPatient.Exists(p0), 22 | variables={"v": SelectColumn(p0, "i1")}, 23 | events={}, 24 | measures=None, 25 | ) 26 | data = [] 27 | -------------------------------------------------------------------------------- /tests/generative/generic_strategies.py: -------------------------------------------------------------------------------- 1 | from hypothesis import strategies as st 2 | 3 | 4 | # This is a variable that will normally be set to True, but which shrinking is 5 | # allowed to make False. 6 | # 7 | # Examples of where this is useful: 8 | # 9 | # * Turning off expensive test operations that you want to check but don't care 10 | # about running if they turn out not to be relevant to the error you're 11 | # seeing. 12 | # * Giving the shrinker a place to terminate generation in places where you've 13 | # decided to do it yourself because e.g. the data was getting too large. 14 | usually = st.integers(0, 255).map(lambda n: n > 0) 15 | 16 | 17 | @st.composite 18 | def usually_all_of(draw, options, min_size=1): 19 | """Generates a list of distinct elements drawn from `options`, of size at least 20 | `min_size`. In the normal course of things, this will usually be all of `options`, 21 | but the shrinker is allowed to remove elements from it, which can speed up 22 | test execution during shrinking significantly.""" 23 | flags = draw(st.lists(usually, min_size=len(options), max_size=len(options))) 24 | 25 | # In order to make sure enough of these are set, we set some 26 | # of the flags to true. We do this unconditionally on whether enough 27 | # flags are already set so that when shrinking we don't start to generate 28 | # more data when some of the flags are shrunk to false. 29 | extra_flags = draw( 30 | st.lists( 31 | st.integers(0, len(options) - 1), 32 | min_size=min_size, 33 | max_size=min_size, 34 | unique=True, 35 | ) 36 | ) 37 | n_set = flags.count(True) 38 | for i in extra_flags[: max(min_size - n_set, 0)]: 39 | flags[i] = True 40 | return [option for option, include in zip(options, flags) if include] 41 | -------------------------------------------------------------------------------- /tests/generative/test_data_setup.py: -------------------------------------------------------------------------------- 1 | import hypothesis as hyp 2 | from hypothesis.vendor.pretty import pretty 3 | 4 | from . import test_query_model 5 | 6 | 7 | # We just need a single non-empty example to check, and we want to keep the test 8 | # deterministic 9 | @hyp.given(example=test_query_model.data_strategy) 10 | @hyp.settings(max_examples=1, derandomize=True) 11 | def test_data_strategy_examples_round_trip(example): 12 | """ 13 | Examples produced by `data_strategy` contain references to classes dynamically 14 | generated in `data_setup` and we need to do some underhand stuff to make sure they 15 | can be copy/pasted back into `@hypothesis.example()` and evaluate correctly. 16 | 17 | We've broken this properly once without realising so this test ensures we don't do so 18 | again. 19 | """ 20 | hyp.assume(len(example) > 0) 21 | # `pretty` is the formatter Hypothesis uses for examples 22 | example_repr = pretty(example) 23 | # Evaluate it in the context of the `test_query_model` module, which is where 24 | # examples will get pasted 25 | evaled = eval(example_repr, globals(), vars(test_query_model)) 26 | assert evaled == example 27 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/backends/__init__.py -------------------------------------------------------------------------------- /tests/integration/backends/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlalchemy 3 | 4 | from ehrql.backends.emis import EMISBackend 5 | from ehrql.backends.tpp import TPPBackend 6 | 7 | 8 | def _get_select_all_query(request, backend): 9 | try: 10 | ql_table = request.function._table 11 | except AttributeError: # pragma: no cover 12 | raise RuntimeError( 13 | f"Function '{request.function.__name__}' needs the " 14 | f"`@register_test_for(table)` decorator applied" 15 | ) 16 | 17 | qm_table = ql_table._qm_node 18 | sql_table = backend.get_table_expression(qm_table.name, qm_table.schema) 19 | columns = [ 20 | # Using `type_coerce(..., None)` like this strips the type information from the 21 | # SQLAlchemy column meaning we get back the type that the column actually is in 22 | # database, not the type we've told SQLAlchemy it is. 23 | sqlalchemy.type_coerce(column, None).label(column.key) 24 | for column in sql_table.columns 25 | ] 26 | return sqlalchemy.select(*columns) 27 | 28 | 29 | def _select_all_fn(select_all_query, database): 30 | def _select_all(*input_data): 31 | database.setup(*input_data) 32 | with database.engine().connect() as connection: 33 | results = connection.execute(select_all_query) 34 | return sorted( 35 | [row._asdict() for row in results], key=lambda x: x["patient_id"] 36 | ) 37 | 38 | return _select_all 39 | 40 | 41 | @pytest.fixture 42 | def select_all_emis(request, trino_database): 43 | select_all_query = _get_select_all_query(request, EMISBackend()) 44 | return _select_all_fn(select_all_query, trino_database) 45 | 46 | 47 | @pytest.fixture 48 | def select_all_tpp(request, mssql_database): 49 | backend = TPPBackend(config={"TEMP_DATABASE_NAME": "temp_tables"}) 50 | select_all_query = _get_select_all_query(request, backend) 51 | return _select_all_fn(select_all_query, mssql_database) 52 | -------------------------------------------------------------------------------- /tests/integration/file_formats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/file_formats/__init__.py -------------------------------------------------------------------------------- /tests/integration/file_formats/test_csv.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | 3 | import pytest 4 | 5 | from ehrql.file_formats import write_rows 6 | from ehrql.query_model.column_specs import ColumnSpec 7 | 8 | 9 | @pytest.mark.parametrize("basename", [None, "file.csv", "file.csv.gz"]) 10 | def test_write_rows_csv(tmp_path, capsys, basename): 11 | if basename is None: 12 | filename = None 13 | else: 14 | filename = tmp_path / "somedir" / basename 15 | 16 | column_specs = { 17 | "patient_id": ColumnSpec(int), 18 | "year_of_birth": ColumnSpec(int), 19 | "sex": ColumnSpec(str), 20 | } 21 | results = [ 22 | (123, 1980, "F"), 23 | (456, None, None), 24 | (789, 1999, "M"), 25 | ] 26 | 27 | write_rows(filename, results, column_specs) 28 | 29 | if basename is None: 30 | output = capsys.readouterr().out 31 | elif basename.endswith(".csv.gz"): 32 | with gzip.open(filename, "rt") as f: 33 | output = f.read() 34 | elif basename.endswith(".csv"): 35 | output = filename.read_text() 36 | else: 37 | assert False 38 | 39 | assert output.splitlines() == [ 40 | "patient_id,year_of_birth,sex", 41 | "123,1980,F", 42 | "456,,", 43 | "789,1999,M", 44 | ] 45 | -------------------------------------------------------------------------------- /tests/integration/measures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/measures/__init__.py -------------------------------------------------------------------------------- /tests/integration/query_engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/query_engines/__init__.py -------------------------------------------------------------------------------- /tests/integration/query_engines/test_dialects.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlalchemy 3 | 4 | 5 | def test_case_statement(engine): 6 | """ 7 | Test a basic CASE statement returning a string value. This exposed a bug in the 8 | string handling of our Spark dialect so it's useful to keep it around. 9 | """ 10 | if engine.name == "in_memory": 11 | pytest.skip("SQLAlchemy dialect tests do not apply to the in-memory engine") 12 | 13 | case_statement = sqlalchemy.case( 14 | (sqlalchemy.literal(1) == 0, "foo"), 15 | (sqlalchemy.literal(1) == 1, "bar"), 16 | ) 17 | query = sqlalchemy.select(case_statement.label("output")) 18 | with engine.sqlalchemy_engine().connect() as conn: 19 | results = list(conn.execute(query)) 20 | assert results[0].output == "bar" 21 | -------------------------------------------------------------------------------- /tests/integration/query_engines/test_local_file.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ehrql import Dataset 4 | from ehrql.query_engines.local_file import LocalFileQueryEngine 5 | from ehrql.tables import EventFrame, PatientFrame, Series, table 6 | 7 | 8 | FIXTURES = Path(__file__).parents[2] / "fixtures" / "local_file_engine" 9 | 10 | 11 | @table 12 | class patients(PatientFrame): 13 | sex = Series(str) 14 | 15 | 16 | @table 17 | class events(EventFrame): 18 | score = Series(int) 19 | # Columns in the schema which aren't in the data files should just end up NULL 20 | expected_missing = Series(bool) 21 | 22 | 23 | def test_local_file_query_engine(): 24 | dataset = Dataset() 25 | dataset.sex = patients.sex 26 | dataset.total_score = events.score.sum_for_patient() 27 | # Check that missing columns end up NULL 28 | dataset.missing = events.where( 29 | events.expected_missing.is_null() 30 | ).count_for_patient() 31 | 32 | dataset.define_population(patients.exists_for_patient()) 33 | dataset_qm = dataset._compile() 34 | 35 | query_engine = LocalFileQueryEngine(FIXTURES) 36 | results = query_engine.get_results(dataset_qm) 37 | 38 | assert list(results) == [ 39 | (1, "M", 9, 3), 40 | (2, "F", 15, 2), 41 | (3, None, None, 0), 42 | ] 43 | -------------------------------------------------------------------------------- /tests/integration/query_engines/test_mssql_dialect.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | import sqlalchemy 5 | 6 | from ehrql.query_engines.mssql_dialect import MSSQLDialect 7 | 8 | 9 | def test_date_literals_have_correct_type(mssql_engine): 10 | case_statement = sqlalchemy.case( 11 | ( 12 | sqlalchemy.literal(1) == 1, 13 | datetime.date(2000, 10, 5), 14 | ), 15 | ) 16 | query = sqlalchemy.select(case_statement.label("output")) 17 | with mssql_engine.sqlalchemy_engine().connect() as conn: 18 | results = list(conn.execute(query)) 19 | assert results[0].output == datetime.date(2000, 10, 5) 20 | 21 | 22 | def test_enforces_minimum_server_version(mssql_engine, monkeypatch): 23 | monkeypatch.setattr(MSSQLDialect, "minimum_server_version", (999999,)) 24 | with pytest.raises(RuntimeError, match=r"we require at least \(999999,\)"): 25 | mssql_engine.sqlalchemy_engine().connect() 26 | 27 | 28 | def test_float_literals_have_correct_type(mssql_engine): 29 | # When using the `pymssql` driver without special float handling the "0.5" below 30 | # gets typed as a decimal and then the result of SUM gets typed as fixed precision 31 | # decimal. 32 | sum_literal = sqlalchemy.func.sum(0.5) 33 | # When added to a decimal of greater precision, the result gets rounded and ends up 34 | # being 0.8 35 | query = sqlalchemy.select(sum_literal + 0.25) 36 | with mssql_engine.sqlalchemy_engine().connect() as conn: 37 | results = list(conn.execute(query)) 38 | # By explicitly casting floats in our custom dialect we can get the correct result 39 | assert results[0][0] == 0.75 40 | -------------------------------------------------------------------------------- /tests/integration/query_engines/test_trino_dialect.py: -------------------------------------------------------------------------------- 1 | from ehrql.query_model.nodes import ( 2 | AggregateByPatient, 3 | Column, 4 | Dataset, 5 | Function, 6 | InlinePatientTable, 7 | SelectColumn, 8 | TableSchema, 9 | ) 10 | 11 | 12 | def test_float_precision(trino_engine): 13 | # This tests that Trino uses 64-bit precision for float columns in inline tables. 14 | v1, v2 = 1, 0.001 15 | 16 | schema = TableSchema(f1=Column(float), f2=Column(float)) 17 | t = InlinePatientTable( 18 | ((1, v1, v2),), 19 | schema, 20 | ) 21 | f1 = SelectColumn(t, "f1") 22 | f2 = SelectColumn(t, "f2") 23 | 24 | dataset = Dataset( 25 | population=AggregateByPatient.Exists(t), 26 | variables={"v": Function.Subtract(f1, Function.Add(f1, f2))}, 27 | events={}, 28 | measures=None, 29 | ) 30 | 31 | results = trino_engine.extract(dataset) 32 | assert results[0]["v"] == v1 - (v1 + v2) 33 | -------------------------------------------------------------------------------- /tests/integration/query_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/query_model/__init__.py -------------------------------------------------------------------------------- /tests/integration/query_model/test_transforms.py: -------------------------------------------------------------------------------- 1 | from ehrql.query_model.nodes import ( 2 | AggregateByPatient, 3 | Dataset, 4 | PickOneRowPerPatient, 5 | Position, 6 | SelectColumn, 7 | SelectTable, 8 | Sort, 9 | TableSchema, 10 | ) 11 | 12 | 13 | events = SelectTable( 14 | "events", 15 | schema=TableSchema.from_primitives(i=int, b=bool), 16 | ) 17 | 18 | 19 | def test_sort_booleans_null_first(engine): 20 | # The transforms add sorts for unsorted selected columns. Here we're checking the semantics 21 | # of the sort added for boolean columns (which are handled explicitly because some databases 22 | # don't allow sorting on booleans. 23 | # 24 | # The desired sort order is: NULL, False, True. 25 | # 26 | # Each of these patients has two records with different boolean values so we do pairwise 27 | # comparisons. The integer column is there only so we can specify a sort on it in the query 28 | # model. 29 | engine.populate( 30 | { 31 | events: [ 32 | dict(patient_id=0, row_id=0, i=0, b=False), 33 | dict(patient_id=0, row_id=1, i=0, b=True), 34 | dict(patient_id=1, row_id=2, i=0, b=None), 35 | dict(patient_id=1, row_id=3, i=0, b=True), 36 | dict(patient_id=2, row_id=4, i=0, b=None), 37 | dict(patient_id=2, row_id=5, i=0, b=False), 38 | ] 39 | } 40 | ) 41 | 42 | # Sort the events by i and pick the b from the last row. 43 | by_i = Sort(events, SelectColumn(events, "i")) 44 | variable = SelectColumn( 45 | PickOneRowPerPatient(source=by_i, position=Position.LAST), 46 | "b", 47 | ) 48 | population = AggregateByPatient.Exists(events) 49 | dataset = Dataset( 50 | population=population, variables={"v": variable}, events={}, measures=None 51 | ) 52 | 53 | assert engine.extract(dataset) == [ 54 | dict(patient_id=0, v=True), # True sorts after False 55 | dict(patient_id=1, v=True), # True sorts after NULL 56 | dict(patient_id=2, v=False), # False sorts after NULL 57 | ] 58 | -------------------------------------------------------------------------------- /tests/integration/tables/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/tables/__init__.py -------------------------------------------------------------------------------- /tests/integration/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/utils/__init__.py -------------------------------------------------------------------------------- /tests/integration/utils/test_sqlalchemy_exec_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlalchemy 3 | import sqlalchemy.orm 4 | 5 | from ehrql.utils.sqlalchemy_exec_utils import fetch_table_in_batches 6 | 7 | 8 | Base = sqlalchemy.orm.declarative_base() 9 | 10 | 11 | class SomeTable(Base): 12 | __tablename__ = "some_table" 13 | pk = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True, autoincrement=False) 14 | key = sqlalchemy.Column(sqlalchemy.Integer) 15 | foo = sqlalchemy.Column(sqlalchemy.String) 16 | 17 | 18 | def test_fetch_table_in_batches_unique(engine): 19 | if engine.name == "in_memory": 20 | pytest.skip("SQL tests do not apply to in-memory engine") 21 | 22 | table_size = 15 23 | batch_size = 6 24 | 25 | table_data = [(i, i, f"foo{i}") for i in range(table_size)] 26 | 27 | engine.setup([SomeTable(pk=row[0], key=row[1], foo=row[2]) for row in table_data]) 28 | 29 | table = SomeTable.__table__ 30 | 31 | with engine.sqlalchemy_engine().connect() as connection: 32 | results = fetch_table_in_batches( 33 | connection.execute, 34 | table, 35 | 0, 36 | key_is_unique=True, 37 | batch_size=batch_size, 38 | ) 39 | results = list(results) 40 | 41 | assert results == table_data 42 | 43 | 44 | def test_fetch_table_in_batches_nonunique(engine): 45 | if engine.name == "in_memory": 46 | pytest.skip("SQL tests do not apply to in-memory engine") 47 | 48 | batch_size = 6 49 | repeats = [1, 2, 3, 4, 5, 0, 5, 4, 3, 2, 1] 50 | keys = [key for key, n in enumerate(repeats) for _ in range(n)] 51 | table_data = [(i, key, f"foo{i}") for i, key in enumerate(keys)] 52 | 53 | engine.setup([SomeTable(pk=row[0], key=row[1], foo=row[2]) for row in table_data]) 54 | 55 | table = SomeTable.__table__ 56 | 57 | with engine.sqlalchemy_engine().connect() as connection: 58 | results = fetch_table_in_batches( 59 | connection.execute, 60 | table, 61 | 0, 62 | key_is_unique=False, 63 | batch_size=batch_size, 64 | ) 65 | results = sorted(results) 66 | 67 | assert results == table_data 68 | -------------------------------------------------------------------------------- /tests/integration/utils/test_sqlalchemy_query_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sqlalchemy 3 | 4 | from ehrql.utils.sqlalchemy_query_utils import InsertMany 5 | 6 | 7 | table = sqlalchemy.Table( 8 | "t", 9 | sqlalchemy.MetaData(), 10 | sqlalchemy.Column("i", sqlalchemy.Integer()), 11 | sqlalchemy.Column("s", sqlalchemy.String()), 12 | ) 13 | 14 | 15 | def test_insert_many(engine): 16 | if engine.name == "in_memory": 17 | pytest.skip("SQL tests do not apply to in-memory engine") 18 | 19 | # We need enough rows that we exercise SQLAlchemy's internal batching logic, but not 20 | # so many that we significantly slow down the test 21 | rows = [(i, f"a{i}") for i in range(5000)] 22 | 23 | insert_many = InsertMany( 24 | table, 25 | # Test that we can handle an iterator rather than just a list 26 | iter(rows), 27 | batch_size=2000, 28 | ) 29 | 30 | with engine.sqlalchemy_engine().connect() as connection: 31 | connection.execute(sqlalchemy.schema.CreateTable(table)) 32 | try: 33 | connection.execute(insert_many) 34 | response = connection.execute(sqlalchemy.select(table)) 35 | results = list(response) 36 | finally: 37 | # Explicitly drop the table as it persists in the Trino engine 38 | connection.execute(sqlalchemy.schema.DropTable(table)) 39 | 40 | assert sorted(results) == rows 41 | -------------------------------------------------------------------------------- /tests/integration/utils/test_traceback_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | import ehrql 7 | from ehrql.loaders import DefinitionError, load_module 8 | from ehrql.tables import smoketest 9 | 10 | 11 | FIXTURES = Path(__file__).parents[2] / "fixtures" / "bad_definition_files" 12 | 13 | 14 | def test_traceback_starts_with_user_code(): 15 | filename = FIXTURES / "bad_import.py" 16 | message = f'Traceback (most recent call last):\n File "{filename}"' 17 | with pytest.raises(DefinitionError, match=re.escape(message)): 18 | load_module(filename) 19 | 20 | 21 | def test_traceback_ends_with_user_code(): 22 | filename = FIXTURES / "bad_types.py" 23 | with pytest.raises(DefinitionError) as excinfo: 24 | load_module(filename) 25 | # We shouldn't have any references to ehrql code in the traceback 26 | ehrql_root = str(Path(ehrql.__file__).parent) 27 | assert not re.search(re.escape(ehrql_root), str(excinfo.value)) 28 | 29 | 30 | def test_references_to_failed_imports_from_ehrql_are_not_stripped_out(): 31 | filename = FIXTURES / "bad_import.py" 32 | with pytest.raises(DefinitionError) as excinfo: 33 | load_module(filename) 34 | # We tried to import a name from `smoketest` which doesn't exist, though the module 35 | # itself does. Therefore this module should be visible in the traceback. 36 | assert re.search(re.escape(smoketest.__file__), str(excinfo.value)) 37 | 38 | 39 | def test_traceback_filtering_handles_relative_paths(): 40 | relative_filename = (FIXTURES / "bad_import.py").relative_to(Path.cwd()) 41 | message = r'Traceback \(most recent call last\):\n File ".*bad_import\.py"' 42 | with pytest.raises(DefinitionError, match=message): 43 | load_module(relative_filename) 44 | 45 | 46 | def test_traceback_filtering_handles_syntax_errors(): 47 | filename = FIXTURES / "bad_syntax.py" 48 | message = ( 49 | r"^" 50 | f"Error loading file '{filename}':" 51 | r"\s+" 52 | f'File "{filename}", line 1' 53 | r"\s+" 54 | r"what even is a Python" 55 | r"[\s\^]+" 56 | r"SyntaxError: invalid syntax" 57 | r"$" 58 | ) 59 | with pytest.raises(DefinitionError, match=message): 60 | load_module(filename) 61 | -------------------------------------------------------------------------------- /tests/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/lib/__init__.py -------------------------------------------------------------------------------- /tests/lib/create_tpp_test_db.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run this using: 3 | 4 | pytest -o python_functions=create tests/lib/create_tpp_test_db.py 5 | 6 | It will start an MSSQL Docker container, create all the tables in the TPP schema, and 7 | output the connection string needed to talk to this database. 8 | """ 9 | 10 | from .tpp_schema import Base # pragma: no cover 11 | 12 | 13 | # This is not a test, but we can get pytest to run it as a test so we can re-use all the 14 | # fixture machinery. Because neither this file not this function are named appropriately 15 | # they avoid being discovered and executed during the normal test run. But we can run it 16 | # by passing the path and function name directly to pytest 17 | def create(request, mssql_database_with_session_scope): # pragma: no cover 18 | db = mssql_database_with_session_scope 19 | db.setup(metadata=Base.metadata) 20 | capturemanager = request.config.pluginmanager.getplugin("capturemanager") 21 | with capturemanager.global_and_fixture_disabled(): 22 | print("\n\n=> Created TPP tables in test database") 23 | print() 24 | print("DSN for ehrQL:") 25 | print(f" DATABASE_URL='{db.host_url()}'") 26 | print() 27 | print("Connection string for VSCode MSSQL Extension:") 28 | print( 29 | f" Server={db.host_from_host},{db.port_from_host};Database={db.db_name};" 30 | f"User Id={db.username};Password={db.password};" 31 | ) 32 | print() 33 | -------------------------------------------------------------------------------- /tests/lib/file_utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import gzip 3 | 4 | from pyarrow.feather import read_table 5 | 6 | from ehrql.file_formats import get_file_extension 7 | 8 | 9 | def read_file_as_dicts(filename): 10 | extension = get_file_extension(filename) 11 | if extension == ".csv": 12 | with open(filename, newline="") as f: 13 | return list(csv.DictReader(f)) 14 | elif extension == ".csv.gz": 15 | with gzip.open(filename, "rt", newline="") as f: 16 | return list(csv.DictReader(f)) 17 | elif extension == ".arrow": 18 | return read_table(str(filename)).to_pylist() 19 | else: 20 | assert False, f"Unsupported extension: {filename}" 21 | -------------------------------------------------------------------------------- /tests/lib/inspect_utils.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import inspect 3 | import textwrap 4 | 5 | 6 | def function_body_as_string(function): 7 | """ 8 | Return the de-indented source code of the body of a function 9 | 10 | This is useful for being able to specify the contents of test fixtures without 11 | having to make them seperate files (which makes the tests harder to follow) or to 12 | declare them as string literals (where you lose the benefits of syntax highlighting 13 | and other tooling). 14 | 15 | Note that one downside of this vs string literals is that you can't use templating 16 | in the same way to dynamically generate the fixture. Instead, you need to find some 17 | way of specifying your placeholder values as valid Python and then call `.replace()` 18 | on the resulting string. 19 | """ 20 | source = textwrap.dedent(inspect.getsource(function)) 21 | parsed = ast.parse(source) 22 | assert isinstance(parsed, ast.Module) 23 | func_def = parsed.body[0] 24 | assert isinstance(func_def, ast.FunctionDef) 25 | first_line = func_def.body[0].lineno 26 | body_lines = source.split("\n")[first_line - 1 :] 27 | return textwrap.dedent("\n".join(body_lines)) 28 | -------------------------------------------------------------------------------- /tests/lib/query_model_utils.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | 3 | from ehrql.query_model import nodes as query_model 4 | 5 | 6 | def get_all_operations(): 7 | "Return every operation defined in the query model" 8 | return [cls for cls in iterate_query_model_namespace() if is_operation(cls)] 9 | 10 | 11 | def is_operation(cls): 12 | "Return whether an arbitrary value is a query model operation class" 13 | # We need to check this first or the `issubclass` check can fail 14 | if not isinstance(cls, type): 15 | return False 16 | # We need to check it's a proper subclass as the Node base class isn't itself a 17 | # dataclass so the `fields()` call will fail 18 | if not issubclass(cls, query_model.Node) or cls is query_model.Node: 19 | return False 20 | # If it takes arguments it's an operation, otherwise it's an abstract type 21 | return len(dataclasses.fields(cls)) > 0 22 | 23 | 24 | def iterate_query_model_namespace(): 25 | "Yield every value in the query_model module" 26 | yield from vars(query_model).values() 27 | yield from vars(query_model.Function).values() 28 | yield from vars(query_model.AggregateByPatient).values() 29 | -------------------------------------------------------------------------------- /tests/lib/tpp_decision_support_reference.csv: -------------------------------------------------------------------------------- 1 | AlgorithmType,AlgorithmDescription,AlgorithmVersion,AlgorithmSourceLink 2 | 1,UK Electronic Frailty Index (eFI),1.0,https://academic.oup.com/ageing/article/45/3/353/1739750 3 | -------------------------------------------------------------------------------- /tests/spec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/spec/__init__.py -------------------------------------------------------------------------------- /tests/spec/aggregate_frame/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Aggregating event and patient frames" 2 | -------------------------------------------------------------------------------- /tests/spec/aggregate_frame/test_count_for_patient.py: -------------------------------------------------------------------------------- 1 | from ..tables import e, p 2 | 3 | 4 | title = "Counting the rows for each patient" 5 | 6 | table_data = { 7 | p: """ 8 | | b1 9 | --+---- 10 | 1 | 11 | 2 | 12 | 3 | 13 | """, 14 | e: """ 15 | | b1 16 | --+---- 17 | 1 | 18 | 1 | 19 | 2 | 20 | """, 21 | } 22 | 23 | 24 | def test_count_for_patient_on_event_frame(spec_test): 25 | spec_test( 26 | table_data, 27 | e.count_for_patient(), 28 | { 29 | 1: 2, 30 | 2: 1, 31 | 3: 0, 32 | }, 33 | ) 34 | 35 | 36 | def test_count_for_patient_on_patient_frame(spec_test): 37 | spec_test( 38 | table_data, 39 | p.count_for_patient(), 40 | { 41 | 1: 1, 42 | 2: 1, 43 | 3: 1, 44 | }, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/spec/aggregate_frame/test_exists_for_patient.py: -------------------------------------------------------------------------------- 1 | from ..tables import e, p 2 | 3 | 4 | title = "Determining whether a row exists for each patient" 5 | 6 | table_data = { 7 | p: """ 8 | | b1 9 | --+---- 10 | 1 | 11 | 2 | 12 | 3 | 13 | """, 14 | e: """ 15 | | b1 16 | --+---- 17 | 1 | 18 | 1 | 19 | 2 | 20 | """, 21 | } 22 | 23 | 24 | def test_exists_for_patient_on_event_frame(spec_test): 25 | spec_test( 26 | table_data, 27 | e.exists_for_patient(), 28 | { 29 | 1: True, 30 | 2: True, 31 | 3: False, 32 | }, 33 | ) 34 | 35 | 36 | def test_exists_for_patient_on_patient_frame(spec_test): 37 | spec_test( 38 | table_data, 39 | p.exists_for_patient(), 40 | { 41 | 1: True, 42 | 2: True, 43 | 3: True, 44 | }, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/spec/aggregate_series/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Aggregating event series" 2 | -------------------------------------------------------------------------------- /tests/spec/aggregate_series/test_count_distinct_for_patient.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Count distinct aggregation" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 | f1 | s1 | d1 9 | --+-----+-----+----+------------ 10 | 1 | 101 | 1.1 | a | 2020-01-01 11 | 1 | 102 | 1.2 | b | 2020-01-02 12 | 1 | 103 | 1.5 | c | 2020-01-03 13 | 2 | 201 | 2.1 | a | 2020-02-01 14 | 2 | 201 | 2.1 | a | 2020-02-01 15 | 2 | 203 | 2.5 | b | 2020-02-02 16 | 3 | 301 | 3.1 | a | 2020-03-01 17 | 3 | 301 | 3.1 | a | 2020-03-01 18 | 3 | | | | 19 | 3 | | | | 20 | 4 | | | | 21 | """, 22 | } 23 | 24 | 25 | def test_count_distinct_for_patient_integer(spec_test): 26 | spec_test( 27 | table_data, 28 | e.i1.count_distinct_for_patient(), 29 | { 30 | 1: 3, 31 | 2: 2, 32 | 3: 1, 33 | 4: 0, 34 | }, 35 | ) 36 | 37 | 38 | def test_count_distinct_for_patient_float(spec_test): 39 | spec_test( 40 | table_data, 41 | e.f1.count_distinct_for_patient(), 42 | { 43 | 1: 3, 44 | 2: 2, 45 | 3: 1, 46 | 4: 0, 47 | }, 48 | ) 49 | 50 | 51 | def test_count_distinct_for_patient_string(spec_test): 52 | spec_test( 53 | table_data, 54 | e.s1.count_distinct_for_patient(), 55 | { 56 | 1: 3, 57 | 2: 2, 58 | 3: 1, 59 | 4: 0, 60 | }, 61 | ) 62 | 63 | 64 | def test_count_distinct_for_patient_date(spec_test): 65 | spec_test( 66 | table_data, 67 | e.s1.count_distinct_for_patient(), 68 | { 69 | 1: 3, 70 | 2: 2, 71 | 3: 1, 72 | 4: 0, 73 | }, 74 | ) 75 | -------------------------------------------------------------------------------- /tests/spec/aggregate_series/test_mean_for_patient.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Mean aggregation" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 | f1 9 | --+----+----- 10 | 1 | 1 | 1.1 11 | 1 | 2 | 2.1 12 | 1 | 3 | 3.1 13 | 2 | | 14 | 2 | 2 | 2.1 15 | 2 | 3 | 3.1 16 | 3 | | 17 | """, 18 | } 19 | 20 | 21 | def test_mean_for_patient_integer(spec_test): 22 | spec_test( 23 | table_data, 24 | e.i1.mean_for_patient(), 25 | { 26 | 1: (1 + 2 + 3) / 3, 27 | 2: (2 + 3) / 2, 28 | 3: None, 29 | }, 30 | ) 31 | 32 | 33 | def test_mean_for_patient_float(spec_test): 34 | spec_test( 35 | table_data, 36 | e.f1.mean_for_patient(), 37 | { 38 | 1: (1.1 + 2.1 + 3.1) / 3, 39 | 2: (2.1 + 3.1) / 2, 40 | 3: None, 41 | }, 42 | ) 43 | -------------------------------------------------------------------------------- /tests/spec/aggregate_series/test_minimum_and_maximum_for_patient.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Minimum and maximum aggregations" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 1 | 102 12 | 1 | 103 13 | 2 | 201 14 | 2 | 15 | 3 | 16 | """, 17 | } 18 | 19 | 20 | def test_minimum_for_patient(spec_test): 21 | spec_test( 22 | table_data, 23 | e.i1.minimum_for_patient(), 24 | { 25 | 1: 101, 26 | 2: 201, 27 | 3: None, 28 | }, 29 | ) 30 | 31 | 32 | def test_maximum_for_patient(spec_test): 33 | spec_test( 34 | table_data, 35 | e.i1.maximum_for_patient(), 36 | { 37 | 1: 103, 38 | 2: 201, 39 | 3: None, 40 | }, 41 | ) 42 | -------------------------------------------------------------------------------- /tests/spec/aggregate_series/test_sum_for_patient.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Sum aggregation" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 1 | 102 12 | 1 | 103 13 | 2 | 201 14 | 2 | 15 | 2 | 203 16 | 3 | 17 | """, 18 | } 19 | 20 | 21 | def test_sum_for_patient(spec_test): 22 | spec_test( 23 | table_data, 24 | e.i1.sum_for_patient(), 25 | { 26 | 1: (101 + 102 + 103), 27 | 2: (201 + 203), 28 | 3: None, 29 | }, 30 | ) 31 | -------------------------------------------------------------------------------- /tests/spec/bool_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on boolean series" 2 | -------------------------------------------------------------------------------- /tests/spec/bool_series_ops/test_conversion.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Convert a boolean value to an integer" 5 | 6 | table_data = { 7 | p: """ 8 | | b1 9 | --+---- 10 | 1 | T 11 | 2 | 12 | 3 | F 13 | """, 14 | } 15 | 16 | 17 | def test_bool_as_int(spec_test): 18 | """ 19 | Booleans are converted to 0 (False) or 1 (True). 20 | """ 21 | spec_test( 22 | table_data, 23 | p.b1.as_int(), 24 | {1: 1, 2: None, 3: 0}, 25 | ) 26 | -------------------------------------------------------------------------------- /tests/spec/bool_series_ops/test_logical_ops.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Logical operations" 5 | 6 | 7 | def test_not(spec_test): 8 | table_data = { 9 | p: """ 10 | | b1 11 | --+---- 12 | 1 | T 13 | 2 | 14 | 3 | F 15 | """, 16 | } 17 | 18 | spec_test( 19 | table_data, 20 | ~p.b1, 21 | { 22 | 1: False, 23 | 2: None, 24 | 3: True, 25 | }, 26 | ) 27 | 28 | 29 | table_data = { 30 | p: """ 31 | | b1 | b2 32 | --+----+---- 33 | 1 | T | T 34 | 2 | T | 35 | 3 | T | F 36 | 4 | | T 37 | 5 | | 38 | 6 | | F 39 | 7 | F | T 40 | 8 | F | 41 | 9 | F | F 42 | """, 43 | } 44 | 45 | 46 | def test_and(spec_test): 47 | spec_test( 48 | table_data, 49 | p.b1 & p.b2, 50 | { 51 | 1: True, 52 | 2: None, 53 | 3: False, 54 | 4: None, 55 | 5: None, 56 | 6: False, 57 | 7: False, 58 | 8: False, 59 | 9: False, 60 | }, 61 | ) 62 | 63 | 64 | def test_or(spec_test): 65 | spec_test( 66 | table_data, 67 | p.b1 | p.b2, 68 | { 69 | 1: True, 70 | 2: True, 71 | 3: True, 72 | 4: True, 73 | 5: None, 74 | 6: None, 75 | 7: True, 76 | 8: None, 77 | 9: False, 78 | }, 79 | ) 80 | -------------------------------------------------------------------------------- /tests/spec/case_expressions/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Logical case expressions" 2 | -------------------------------------------------------------------------------- /tests/spec/case_expressions/test_when.py: -------------------------------------------------------------------------------- 1 | from ehrql import when 2 | 3 | from ..tables import p 4 | 5 | 6 | title = "Case expressions with single condition" 7 | 8 | 9 | def test_when_with_expression(spec_test): 10 | table_data = { 11 | p: """ 12 | | i1 13 | --+---- 14 | 1 | 6 15 | 2 | 7 16 | 3 | 8 17 | 4 | 18 | """, 19 | } 20 | spec_test( 21 | table_data, 22 | when(p.i1 < 8).then(p.i1).otherwise(100), 23 | { 24 | 1: 6, 25 | 2: 7, 26 | 3: 100, 27 | 4: 100, 28 | }, 29 | ) 30 | 31 | 32 | def test_when_with_boolean_column(spec_test): 33 | table_data = { 34 | p: """ 35 | | i1 | b1 36 | --+----+---- 37 | 1 | 6 | T 38 | 2 | 7 | F 39 | 3 | | 40 | """, 41 | } 42 | 43 | spec_test( 44 | table_data, 45 | when(p.b1).then(p.i1).otherwise(100), 46 | { 47 | 1: 6, 48 | 2: 100, 49 | 3: 100, 50 | }, 51 | ) 52 | -------------------------------------------------------------------------------- /tests/spec/code_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on all series containing codes" 2 | -------------------------------------------------------------------------------- /tests/spec/code_series_ops/test_containment.py: -------------------------------------------------------------------------------- 1 | from ehrql.codes import SNOMEDCTCode, codelist_from_csv_lines 2 | 3 | from ..tables import p 4 | 5 | 6 | title = "Testing for containment using codes" 7 | 8 | table_data = { 9 | p: """ 10 | | c1 11 | --+-------- 12 | 1 | 123000 13 | 2 | 456000 14 | 3 | 789000 15 | 4 | 16 | """, 17 | } 18 | 19 | 20 | def test_is_in(spec_test): 21 | spec_test( 22 | table_data, 23 | p.c1.is_in([SNOMEDCTCode("123000"), SNOMEDCTCode("789000")]), 24 | { 25 | 1: True, 26 | 2: False, 27 | 3: True, 28 | 4: None, 29 | }, 30 | ) 31 | 32 | 33 | def test_is_not_in(spec_test): 34 | spec_test( 35 | table_data, 36 | p.c1.is_not_in([SNOMEDCTCode("123000"), SNOMEDCTCode("789000")]), 37 | { 38 | 1: False, 39 | 2: True, 40 | 3: False, 41 | 4: None, 42 | }, 43 | ) 44 | 45 | 46 | def test_is_in_codelist_csv(spec_test): 47 | codelist = codelist_from_csv_lines( 48 | [ 49 | "code", 50 | "123000", 51 | "789000", 52 | ], 53 | column="code", 54 | ) 55 | 56 | spec_test( 57 | table_data, 58 | p.c1.is_in(codelist), 59 | { 60 | 1: True, 61 | 2: False, 62 | 3: True, 63 | 4: None, 64 | }, 65 | ) 66 | -------------------------------------------------------------------------------- /tests/spec/code_series_ops/test_map_codes_to_categories.py: -------------------------------------------------------------------------------- 1 | from ehrql.codes import codelist_from_csv_lines 2 | 3 | from ..tables import p 4 | 5 | 6 | title = "Test mapping codes to categories using a categorised codelist" 7 | 8 | table_data = { 9 | p: """ 10 | | c1 11 | --+-------- 12 | 1 | 123000 13 | 2 | 456000 14 | 3 | 789000 15 | 4 | 16 | """, 17 | } 18 | 19 | 20 | def test_map_codes_to_categories(spec_test): 21 | codelist = codelist_from_csv_lines( 22 | [ 23 | "code,my_categorisation", 24 | "123000,cat1", 25 | "789000,cat2", 26 | ], 27 | column="code", 28 | category_column="my_categorisation", 29 | ) 30 | 31 | spec_test( 32 | table_data, 33 | p.c1.to_category(codelist), 34 | { 35 | 1: "cat1", 36 | 2: None, 37 | 3: "cat2", 38 | 4: None, 39 | }, 40 | ) 41 | -------------------------------------------------------------------------------- /tests/spec/combine_series/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Combining series" 2 | -------------------------------------------------------------------------------- /tests/spec/combine_series/test_event_series_and_event_series.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Combining two event series" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 | i2 | s1 9 | --+-----+-----+--- 10 | 1 | 101 | 111 | b 11 | 1 | 102 | 112 | a 12 | 2 | 201 | 211 | b 13 | 2 | 202 | 212 | a 14 | """, 15 | } 16 | 17 | 18 | def test_event_series_and_event_series(spec_test): 19 | spec_test( 20 | table_data, 21 | (e.i1 + e.i2).sum_for_patient(), 22 | { 23 | 1: (101 + 111) + (102 + 112), 24 | 2: (201 + 211) + (202 + 212), 25 | }, 26 | ) 27 | 28 | 29 | def test_event_series_and_sorted_event_series(spec_test): 30 | """ 31 | The sort order of the underlying event series does not affect their combination. 32 | """ 33 | spec_test( 34 | table_data, 35 | (e.i1 + e.sort_by(e.s1).i2).minimum_for_patient(), 36 | { 37 | 1: (101 + 111), 38 | 2: (201 + 211), 39 | }, 40 | ) 41 | -------------------------------------------------------------------------------- /tests/spec/combine_series/test_event_series_and_patient_series.py: -------------------------------------------------------------------------------- 1 | from ..tables import e, p 2 | 3 | 4 | title = "Combining an event series with a patient series" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 2 | 201 12 | 13 | """, 14 | e: """ 15 | | i1 16 | --+----- 17 | 1 | 111 18 | 1 | 112 19 | 2 | 211 20 | 2 | 212 21 | """, 22 | } 23 | 24 | 25 | def test_event_series_and_patient_series(spec_test): 26 | spec_test( 27 | table_data, 28 | (e.i1 + p.i1).sum_for_patient(), 29 | { 30 | 1: (111 + 101) + (112 + 101), 31 | 2: (211 + 201) + (212 + 201), 32 | }, 33 | ) 34 | 35 | 36 | def test_patient_series_and_event_series(spec_test): 37 | spec_test( 38 | table_data, 39 | (p.i1 + e.i1).sum_for_patient(), 40 | { 41 | 1: (101 + 111) + (101 + 112), 42 | 2: (201 + 211) + (201 + 212), 43 | }, 44 | ) 45 | -------------------------------------------------------------------------------- /tests/spec/combine_series/test_event_series_and_value.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Combining an event series with a value" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 1 | 102 12 | 2 | 201 13 | 2 | 202 14 | """, 15 | } 16 | 17 | 18 | def test_event_series_and_value(spec_test): 19 | spec_test( 20 | table_data, 21 | (e.i1 + 1).sum_for_patient(), 22 | { 23 | 1: (101 + 1) + (102 + 1), 24 | 2: (201 + 1) + (202 + 1), 25 | }, 26 | ) 27 | 28 | 29 | def test_value_and_event_series(spec_test): 30 | spec_test( 31 | table_data, 32 | (1 + e.i1).sum_for_patient(), 33 | { 34 | 1: (1 + 101) + (1 + 102), 35 | 2: (1 + 201) + (1 + 202), 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/spec/combine_series/test_patient_series_and_patient_series.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Combining two patient series" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 | i2 9 | --+-----+----- 10 | 1 | 101 | 102 11 | 2 | 201 | 202 12 | """, 13 | } 14 | 15 | 16 | def test_patient_series_and_patient_series(spec_test): 17 | spec_test( 18 | table_data, 19 | p.i1 + p.i2, 20 | { 21 | 1: (101 + 102), 22 | 2: (201 + 202), 23 | }, 24 | ) 25 | -------------------------------------------------------------------------------- /tests/spec/combine_series/test_patient_series_and_value.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Combining a patient series with a value" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 2 | 201 12 | """, 13 | } 14 | 15 | 16 | def test_patient_series_and_value(spec_test): 17 | spec_test( 18 | table_data, 19 | p.i1 + 1, 20 | { 21 | 1: (101 + 1), 22 | 2: (201 + 1), 23 | }, 24 | ) 25 | 26 | 27 | def test_value_and_patient_series(spec_test): 28 | spec_test( 29 | table_data, 30 | 1 + p.i1, 31 | { 32 | 1: (1 + 101), 33 | 2: (1 + 201), 34 | }, 35 | ) 36 | -------------------------------------------------------------------------------- /tests/spec/date_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on all series containing dates" 2 | -------------------------------------------------------------------------------- /tests/spec/date_series_ops/test_date_aggregations.py: -------------------------------------------------------------------------------- 1 | from ehrql import days 2 | 3 | from ..tables import e 4 | 5 | 6 | title = "Aggregations which apply to all series containing dates" 7 | 8 | 9 | def test_count_episodes(spec_test): 10 | table_data = { 11 | e: """ 12 | | d1 13 | --+------------ 14 | 1 | 2020-01-01 15 | 1 | 2020-01-04 16 | 1 | 2020-01-06 17 | 1 | 2020-01-10 18 | 1 | 2020-01-12 19 | 2 | 2020-01-01 20 | 3 | 21 | 4 | 2020-01-10 22 | 4 | 23 | 4 | 24 | 4 | 2020-01-01 25 | """, 26 | } 27 | 28 | spec_test( 29 | table_data, 30 | e.d1.count_episodes_for_patient(days(3)), 31 | { 32 | 1: 2, 33 | 2: 1, 34 | 3: 0, 35 | 4: 2, 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/spec/date_series_ops/test_date_comparison_types.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from ..tables import p 4 | 5 | 6 | title = "Types usable in comparisons involving dates" 7 | 8 | table_data = { 9 | p: """ 10 | | d1 | d2 11 | --+------------+------------ 12 | 1 | 1990-01-01 | 1980-01-01 13 | 2 | 2000-01-01 | 1980-01-01 14 | 3 | 2010-01-01 | 2020-01-01 15 | 4 | | 2020-01-01 16 | """, 17 | } 18 | 19 | 20 | def test_accepts_python_date_object(spec_test): 21 | spec_test( 22 | table_data, 23 | p.d1.is_before(datetime.date(2000, 1, 20)), 24 | { 25 | 1: True, 26 | 2: True, 27 | 3: False, 28 | 4: None, 29 | }, 30 | ) 31 | 32 | 33 | def test_accepts_iso_formated_date_string(spec_test): 34 | spec_test( 35 | table_data, 36 | p.d1.is_before("2000-01-20"), 37 | { 38 | 1: True, 39 | 2: True, 40 | 3: False, 41 | 4: None, 42 | }, 43 | ) 44 | 45 | 46 | def test_accepts_another_date_series(spec_test): 47 | spec_test( 48 | table_data, 49 | p.d1.is_before(p.d2), 50 | { 51 | 1: False, 52 | 2: False, 53 | 3: True, 54 | 4: None, 55 | }, 56 | ) 57 | -------------------------------------------------------------------------------- /tests/spec/dummy/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Dummy chapter for testing spec generation" 2 | text = "This chapter should not appear in the table of contents" 3 | -------------------------------------------------------------------------------- /tests/spec/dummy/test_dummy.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Dummy section for testing spec generation" 5 | text = "This section should not appear in the table of contents" 6 | 7 | 8 | table_data = { 9 | p: """ 10 | | b1 | b2 11 | --+----+---- 12 | 1 | T | T 13 | 2 | T | 14 | 3 | T | F 15 | 4 | | T 16 | 5 | | 17 | 6 | | F 18 | 7 | F | T 19 | 8 | F | 20 | 9 | F | F 21 | """, 22 | } 23 | 24 | 25 | def test_function_with_docstring(spec_test): 26 | """this docstring should appear in the spec""" 27 | spec_test( 28 | table_data, 29 | p.b1 & p.b2, 30 | { 31 | 1: True, 32 | 2: None, 33 | 3: False, 34 | 4: None, 35 | 5: None, 36 | 6: False, 37 | 7: False, 38 | 8: False, 39 | 9: False, 40 | }, 41 | ) 42 | 43 | 44 | def test_function_without_docstring(spec_test): 45 | spec_test( 46 | table_data, 47 | p.b1 & p.b2, 48 | { 49 | 1: True, 50 | 2: None, 51 | 3: False, 52 | 4: None, 53 | 5: None, 54 | 6: False, 55 | 7: False, 56 | 8: False, 57 | 9: False, 58 | }, 59 | ) 60 | -------------------------------------------------------------------------------- /tests/spec/filter/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Filtering an event frame" 2 | -------------------------------------------------------------------------------- /tests/spec/float_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on float series" 2 | -------------------------------------------------------------------------------- /tests/spec/float_series_ops/test_arithmetic_ops.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Arithmetic operations without division" 5 | 6 | table_data = { 7 | p: """ 8 | | f1 | f2 9 | --+-------+------- 10 | 1 | 101.3 | 111.5 11 | 2 | 201.4 | 12 | """, 13 | } 14 | 15 | 16 | def test_negate(spec_test): 17 | spec_test( 18 | table_data, 19 | -p.f2, 20 | {1: -111.5, 2: None}, 21 | ) 22 | 23 | 24 | def test_add(spec_test): 25 | spec_test( 26 | table_data, 27 | p.f1 + p.f2, 28 | {1: 101.3 + 111.5, 2: None}, 29 | ) 30 | 31 | 32 | def test_subtract_with_positive_result(spec_test): 33 | spec_test( 34 | table_data, 35 | p.f2 - p.f1, 36 | {1: 111.5 - 101.3, 2: None}, 37 | ) 38 | 39 | 40 | def test_subtract_with_negative_result(spec_test): 41 | spec_test( 42 | table_data, 43 | p.f1 - p.f2, 44 | {1: 101.3 - 111.5, 2: None}, 45 | ) 46 | 47 | 48 | def test_multiply(spec_test): 49 | spec_test( 50 | table_data, 51 | p.f1 * p.f2, 52 | {1: 101.3 * 111.5, 2: None}, 53 | ) 54 | 55 | 56 | def test_multiply_with_constant(spec_test): 57 | spec_test( 58 | table_data, 59 | 10.0 * p.f2, 60 | {1: 10.0 * 111.5, 2: None}, 61 | ) 62 | -------------------------------------------------------------------------------- /tests/spec/float_series_ops/test_comparison_ops.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Comparison operations" 5 | 6 | table_data = { 7 | p: """ 8 | | f1 | f2 9 | --+-----+----- 10 | 1 | 101.1 | 201.2 11 | 2 | 201.2 | 201.2 12 | 3 | 301.3 | 201.2 13 | 4 | | 201.2 14 | """, 15 | } 16 | 17 | 18 | def test_less_than(spec_test): 19 | spec_test( 20 | table_data, 21 | p.f1 < p.f2, 22 | {1: True, 2: False, 3: False, 4: None}, 23 | ) 24 | 25 | 26 | def test_less_than_or_equal_to(spec_test): 27 | spec_test( 28 | table_data, 29 | p.f1 <= p.f2, 30 | {1: True, 2: True, 3: False, 4: None}, 31 | ) 32 | 33 | 34 | def test_greater_than(spec_test): 35 | spec_test( 36 | table_data, 37 | p.f1 > p.f2, 38 | {1: False, 2: False, 3: True, 4: None}, 39 | ) 40 | 41 | 42 | def test_greater_than_or_equal_to(spec_test): 43 | spec_test( 44 | table_data, 45 | p.f1 >= p.f2, 46 | {1: False, 2: True, 3: True, 4: None}, 47 | ) 48 | -------------------------------------------------------------------------------- /tests/spec/float_series_ops/test_conversion.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Convert a float value" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 | f1 9 | --+----+---- 10 | 1 | 1 | 1.0 11 | 2 | 42 | 32.3 12 | 3 | 3 | 5.8 13 | 4 | -4 | -6.7 14 | 5 | | -6.2 15 | 6 | | 0.5 16 | 7 | | 17 | """, 18 | } 19 | 20 | 21 | def test_float_as_int(spec_test): 22 | """ 23 | Floats are rounded towards zero. 24 | """ 25 | spec_test( 26 | table_data, 27 | p.f1.as_int(), 28 | {1: 1, 2: 32, 3: 5, 4: -6, 5: -6, 6: 0, 7: None}, 29 | ) 30 | 31 | 32 | def test_float_as_float(spec_test): 33 | spec_test( 34 | table_data, 35 | p.f1.as_float(), 36 | {1: 1.0, 2: 32.3, 3: 5.8, 4: -6.7, 5: -6.2, 6: 0.5, 7: None}, 37 | ) 38 | 39 | 40 | def test_add_float_to_int(spec_test): 41 | spec_test( 42 | table_data, 43 | p.f1 + p.i1.as_float(), 44 | {1: 2.0, 2: 74.3, 3: 8.8, 4: -10.7, 5: None, 6: None, 7: None}, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/spec/int_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on integer series" 2 | -------------------------------------------------------------------------------- /tests/spec/int_series_ops/test_arithmetic_ops.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Arithmetic operations without division" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 | i2 9 | --+-----+----- 10 | 1 | 101 | 111 11 | 2 | 201 | 12 | """, 13 | } 14 | 15 | 16 | def test_negate(spec_test): 17 | spec_test( 18 | table_data, 19 | -p.i2, 20 | {1: -111, 2: None}, 21 | ) 22 | 23 | 24 | def test_add(spec_test): 25 | spec_test( 26 | table_data, 27 | p.i1 + p.i2, 28 | {1: 101 + 111, 2: None}, 29 | ) 30 | 31 | 32 | def test_subtract(spec_test): 33 | spec_test( 34 | table_data, 35 | p.i1 - p.i2, 36 | {1: 101 - 111, 2: None}, 37 | ) 38 | 39 | 40 | def test_multiply(spec_test): 41 | spec_test( 42 | table_data, 43 | p.i1 * p.i2, 44 | {1: 101 * 111, 2: None}, 45 | ) 46 | 47 | 48 | def test_multiply_with_constant(spec_test): 49 | spec_test( 50 | table_data, 51 | 10 * p.i2, 52 | {1: 10 * 111, 2: None}, 53 | ) 54 | -------------------------------------------------------------------------------- /tests/spec/int_series_ops/test_comparison_ops.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Comparison operations" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 | i2 9 | --+-----+----- 10 | 1 | 101 | 201 11 | 2 | 201 | 201 12 | 3 | 301 | 201 13 | 4 | | 201 14 | """, 15 | } 16 | 17 | 18 | def test_less_than(spec_test): 19 | spec_test( 20 | table_data, 21 | p.i1 < p.i2, 22 | {1: True, 2: False, 3: False, 4: None}, 23 | ) 24 | 25 | 26 | def test_less_than_or_equal_to(spec_test): 27 | spec_test( 28 | table_data, 29 | p.i1 <= p.i2, 30 | {1: True, 2: True, 3: False, 4: None}, 31 | ) 32 | 33 | 34 | def test_greater_than(spec_test): 35 | spec_test( 36 | table_data, 37 | p.i1 > p.i2, 38 | {1: False, 2: False, 3: True, 4: None}, 39 | ) 40 | 41 | 42 | def test_greater_than_or_equal_to(spec_test): 43 | spec_test( 44 | table_data, 45 | p.i1 >= p.i2, 46 | {1: False, 2: True, 3: True, 4: None}, 47 | ) 48 | -------------------------------------------------------------------------------- /tests/spec/int_series_ops/test_conversion.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Convert an integer value" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 | f1 9 | --+----+---- 10 | 1 | 1 | 1.0 11 | 2 | 32 | 12.4 12 | 3 | 5 | -3.2 13 | 4 | | 2.1 14 | """, 15 | } 16 | 17 | 18 | def test_integer_as_float(spec_test): 19 | spec_test( 20 | table_data, 21 | p.i1.as_float(), 22 | { 23 | 1: 1.0, 24 | 2: 32.0, 25 | 3: 5.0, 26 | 4: None, 27 | }, 28 | ) 29 | 30 | 31 | def test_integer_as_int(spec_test): 32 | spec_test( 33 | table_data, 34 | p.i1.as_int(), 35 | { 36 | 1: 1, 37 | 2: 32, 38 | 3: 5, 39 | 4: None, 40 | }, 41 | ) 42 | 43 | 44 | def test_add_int_to_float(spec_test): 45 | spec_test( 46 | table_data, 47 | p.i1 + p.f1.as_int(), 48 | {1: 2, 2: 44, 3: 2, 4: None}, 49 | ) 50 | -------------------------------------------------------------------------------- /tests/spec/multi_code_string_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on all series containing multi code strings" 2 | -------------------------------------------------------------------------------- /tests/spec/multi_code_string_series_ops/test_containment.py: -------------------------------------------------------------------------------- 1 | from ehrql.codes import ICD10Code 2 | 3 | from ..tables import p 4 | 5 | 6 | title = "Testing for containment using codes" 7 | 8 | table_data = { 9 | p: """ 10 | | m1 11 | --+-------- 12 | 1 | ||E119 ,J849 ,M069 ||I801 ,I802 13 | 2 | ||T202 ,A429 ||A429 ,A420, J170 14 | 3 | ||M139 ,E220 ,M145, M060 15 | 4 | 16 | """, 17 | } 18 | 19 | 20 | def test_contains_code_prefix(spec_test): 21 | spec_test( 22 | table_data, 23 | p.m1.contains("M06"), 24 | { 25 | 1: True, 26 | 2: False, 27 | 3: True, 28 | 4: None, 29 | }, 30 | ) 31 | 32 | 33 | def test_contains_code(spec_test): 34 | spec_test( 35 | table_data, 36 | p.m1.contains(ICD10Code("M069")), 37 | { 38 | 1: True, 39 | 2: False, 40 | 3: False, 41 | 4: None, 42 | }, 43 | ) 44 | 45 | 46 | def test_contains_any_of_codelist(spec_test): 47 | spec_test( 48 | table_data, 49 | p.m1.contains_any_of([ICD10Code("M069"), "A429"]), 50 | { 51 | 1: True, 52 | 2: True, 53 | 3: False, 54 | 4: None, 55 | }, 56 | ) 57 | -------------------------------------------------------------------------------- /tests/spec/population/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Defining the dataset population" 2 | -------------------------------------------------------------------------------- /tests/spec/population/test_population.py: -------------------------------------------------------------------------------- 1 | from ehrql import case, when 2 | 3 | from ..tables import e, p 4 | 5 | 6 | title = "Defining a population" 7 | text = """ 8 | `define_population` is used to limit the population from which data is extracted. 9 | """ 10 | 11 | 12 | def test_population_with_single_table(spec_test): 13 | """ 14 | Extract a column from a patient table after limiting the population by another column. 15 | """ 16 | table_data = { 17 | p: """ 18 | | b1 | i1 19 | --+----+--- 20 | 1 | F | 10 21 | 2 | T | 20 22 | 3 | F | 30 23 | """, 24 | } 25 | 26 | spec_test( 27 | table_data, 28 | p.i1, 29 | { 30 | 1: 10, 31 | 3: 30, 32 | }, 33 | population=~p.b1, 34 | ) 35 | 36 | 37 | def test_population_with_multiple_tables(spec_test): 38 | """ 39 | Limit the patient population by a column in one table, and return values from another 40 | table. 41 | """ 42 | table_data = { 43 | p: """ 44 | | i1 45 | --+---- 46 | 1 | 10 47 | 2 | 20 48 | 3 | 0 49 | """, 50 | e: """ 51 | | i1 52 | --+----- 53 | 1 | 101 54 | 1 | 102 55 | 3 | 301 56 | 4 | 401 57 | """, 58 | } 59 | 60 | spec_test( 61 | table_data, 62 | e.exists_for_patient(), 63 | { 64 | 1: True, 65 | 2: False, 66 | }, 67 | population=p.i1 > 0, 68 | ) 69 | 70 | 71 | def test_case_with_case_expression(spec_test): 72 | """ 73 | Limit the patient population by a case expression. 74 | """ 75 | table_data = { 76 | p: """ 77 | | i1 78 | --+--- 79 | 1 | 6 80 | 2 | 7 81 | 3 | 9 82 | 4 | 83 | """, 84 | } 85 | 86 | spec_test( 87 | table_data, 88 | p.i1, 89 | { 90 | 1: 6, 91 | 2: 7, 92 | }, 93 | population=case( 94 | when(p.i1 <= 8).then(True), 95 | when(p.i1 > 8).then(False), 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /tests/spec/series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on all series" 2 | -------------------------------------------------------------------------------- /tests/spec/series_ops/test_containment.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Testing for containment" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 2 | 201 12 | 3 | 301 13 | 4 | 14 | """, 15 | } 16 | 17 | 18 | def test_is_in(spec_test): 19 | spec_test( 20 | table_data, 21 | p.i1.is_in([101, 301]), 22 | { 23 | 1: True, 24 | 2: False, 25 | 3: True, 26 | 4: None, 27 | }, 28 | ) 29 | 30 | 31 | def test_is_not_in(spec_test): 32 | spec_test( 33 | table_data, 34 | p.i1.is_not_in([101, 301]), 35 | { 36 | 1: False, 37 | 2: True, 38 | 3: False, 39 | 4: None, 40 | }, 41 | ) 42 | 43 | 44 | def test_is_in_empty_list(spec_test): 45 | spec_test( 46 | table_data, 47 | p.i1.is_in([]), 48 | { 49 | 1: False, 50 | 2: False, 51 | 3: False, 52 | 4: False, 53 | }, 54 | ) 55 | 56 | 57 | def test_is_not_in_empty_list(spec_test): 58 | spec_test( 59 | table_data, 60 | p.i1.is_not_in([]), 61 | { 62 | 1: True, 63 | 2: True, 64 | 3: True, 65 | 4: True, 66 | }, 67 | ) 68 | -------------------------------------------------------------------------------- /tests/spec/series_ops/test_containment_with_series.py: -------------------------------------------------------------------------------- 1 | from ..tables import e, p 2 | 3 | 4 | title = "Testing for containment in another series" 5 | 6 | 7 | table_data = { 8 | p: """ 9 | | i1 10 | --+----- 11 | 1 | 101 12 | 2 | 201 13 | 3 | 301 14 | 4 | 15 | 5 | 501 16 | 6 | 17 | """, 18 | e: """ 19 | | i1 20 | --+----- 21 | 1 | 101 22 | 2 | 201 23 | 2 | 203 24 | 2 | 301 25 | 3 | 333 26 | 3 | 334 27 | 4 | 28 | 4 | 401 29 | 5 | 30 | 5 | 101 31 | """, 32 | } 33 | 34 | 35 | def test_is_in_series(spec_test): 36 | spec_test( 37 | table_data, 38 | p.i1.is_in(e.i1), 39 | { 40 | 1: True, 41 | 2: True, 42 | 3: False, 43 | 4: None, 44 | 5: False, 45 | 6: False, 46 | }, 47 | ) 48 | 49 | 50 | def test_is_not_in_series(spec_test): 51 | spec_test( 52 | table_data, 53 | p.i1.is_not_in(e.i1), 54 | { 55 | 1: False, 56 | 2: False, 57 | 3: True, 58 | 4: None, 59 | 5: True, 60 | 6: True, 61 | }, 62 | ) 63 | -------------------------------------------------------------------------------- /tests/spec/series_ops/test_equality.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Testing for equality" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 | i2 9 | --+-----+----- 10 | 1 | 101 | 101 11 | 2 | 201 | 202 12 | 3 | 301 | 13 | 4 | | 14 | """, 15 | } 16 | 17 | 18 | def test_equals(spec_test): 19 | spec_test( 20 | table_data, 21 | p.i1 == p.i2, 22 | { 23 | 1: True, 24 | 2: False, 25 | 3: None, 26 | 4: None, 27 | }, 28 | ) 29 | 30 | 31 | def test_not_equals(spec_test): 32 | spec_test( 33 | table_data, 34 | p.i1 != p.i2, 35 | { 36 | 1: False, 37 | 2: True, 38 | 3: None, 39 | 4: None, 40 | }, 41 | ) 42 | 43 | 44 | def test_is_null(spec_test): 45 | spec_test( 46 | table_data, 47 | p.i1.is_null(), 48 | { 49 | 1: False, 50 | 2: False, 51 | 3: False, 52 | 4: True, 53 | }, 54 | ) 55 | 56 | 57 | def test_is_not_null(spec_test): 58 | spec_test( 59 | table_data, 60 | p.i1.is_not_null(), 61 | { 62 | 1: True, 63 | 2: True, 64 | 3: True, 65 | 4: False, 66 | }, 67 | ) 68 | -------------------------------------------------------------------------------- /tests/spec/series_ops/test_map_values.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Map from one set of values to another" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 2 | 201 12 | 3 | 301 13 | 4 | 14 | """, 15 | } 16 | 17 | 18 | def test_map_values(spec_test): 19 | spec_test( 20 | table_data, 21 | p.i1.map_values({101: "a", 201: "b", 301: "a"}, default="c"), 22 | { 23 | 1: "a", 24 | 2: "b", 25 | 3: "a", 26 | 4: "c", 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /tests/spec/series_ops/test_when_null_then.py: -------------------------------------------------------------------------------- 1 | from ..tables import p 2 | 3 | 4 | title = "Replace missing values" 5 | 6 | table_data = { 7 | p: """ 8 | | i1 9 | --+----- 10 | 1 | 101 11 | 2 | 201 12 | 3 | 301 13 | 4 | 14 | """, 15 | } 16 | 17 | 18 | def test_when_null_then_integer_column(spec_test): 19 | spec_test( 20 | table_data, 21 | p.i1.when_null_then(0), 22 | { 23 | 1: 101, 24 | 2: 201, 25 | 3: 301, 26 | 4: 0, 27 | }, 28 | ) 29 | 30 | 31 | def test_when_null_then_boolean_column(spec_test): 32 | spec_test( 33 | table_data, 34 | p.i1.is_in([101, 201]).when_null_then(False), 35 | { 36 | 1: True, 37 | 2: True, 38 | 3: False, 39 | 4: False, 40 | }, 41 | ) 42 | -------------------------------------------------------------------------------- /tests/spec/sort_and_pick/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Picking one row for each patient from an event frame" 2 | -------------------------------------------------------------------------------- /tests/spec/sort_and_pick/test_sort_by_column_and_pick.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Picking the first or last row for each patient" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 9 | --+---- 10 | 1 | 101 11 | 1 | 102 12 | 1 | 103 13 | 2 | 203 14 | 2 | 202 15 | 2 | 201 16 | """, 17 | } 18 | 19 | 20 | def test_sort_by_column_pick_first(spec_test): 21 | spec_test( 22 | table_data, 23 | e.sort_by(e.i1).first_for_patient().i1, 24 | { 25 | 1: 101, 26 | 2: 201, 27 | }, 28 | ) 29 | 30 | 31 | def test_sort_by_column_pick_last(spec_test): 32 | spec_test( 33 | table_data, 34 | e.sort_by(e.i1).last_for_patient().i1, 35 | { 36 | 1: 103, 37 | 2: 203, 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /tests/spec/sort_and_pick/test_sort_by_column_with_nulls_and_pick.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Picking the first or last row for each patient where a column contains NULLs" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 9 | --+----- 10 | 1 | 11 | 1 | 102 12 | 1 | 103 13 | 2 | 203 14 | 2 | 202 15 | 2 | 16 | """, 17 | } 18 | 19 | 20 | def test_sort_by_column_with_nulls_and_pick_first(spec_test): 21 | spec_test( 22 | table_data, 23 | e.sort_by(e.i1).first_for_patient().i1, 24 | { 25 | 1: None, 26 | 2: None, 27 | }, 28 | ) 29 | 30 | 31 | def test_sort_by_column_with_nulls_and_pick_last(spec_test): 32 | spec_test( 33 | table_data, 34 | e.sort_by(e.i1).last_for_patient().i1, 35 | { 36 | 1: 103, 37 | 2: 203, 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /tests/spec/sort_and_pick/test_sort_by_interleaved_with_where.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Mixing the order of `sort_by` and `where` operations" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 | i2 9 | --+-----+--- 10 | 1 | 101 | 1 11 | 1 | 102 | 2 12 | 1 | 103 | 2 13 | 2 | 203 | 1 14 | 2 | 202 | 2 15 | 2 | 201 | 2 16 | """, 17 | } 18 | 19 | 20 | def test_sort_by_before_where(spec_test): 21 | spec_test( 22 | table_data, 23 | e.sort_by(e.i1).where(e.i1 > 102).first_for_patient().i1, 24 | { 25 | 1: 103, 26 | 2: 201, 27 | }, 28 | ) 29 | 30 | 31 | def test_sort_by_interleaved_with_where(spec_test): 32 | spec_test( 33 | table_data, 34 | e.sort_by(e.i1).where(e.i2 > 1).sort_by(e.i2).first_for_patient().i1, 35 | { 36 | 1: 102, 37 | 2: 201, 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /tests/spec/sort_and_pick/test_sort_by_multiple_columns_and_pick.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Sort by more than one column and pick the first or last row for each patient" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 | i2 9 | --+-----+--- 10 | 1 | 101 | 3 11 | 1 | 102 | 2 12 | 1 | 102 | 1 13 | 2 | 203 | 1 14 | 2 | 202 | 2 15 | 2 | 202 | 3 16 | """, 17 | } 18 | 19 | 20 | def test_sort_by_multiple_columns_pick_first(spec_test): 21 | spec_test( 22 | table_data, 23 | e.sort_by(e.i1, e.i2).first_for_patient().i2, 24 | { 25 | 1: 3, 26 | 2: 2, 27 | }, 28 | ) 29 | 30 | 31 | def test_sort_by_multiple_columns_pick_last(spec_test): 32 | spec_test( 33 | table_data, 34 | e.sort_by(e.i1, e.i2).last_for_patient().i2, 35 | { 36 | 1: 2, 37 | 2: 1, 38 | }, 39 | ) 40 | -------------------------------------------------------------------------------- /tests/spec/sort_and_pick/test_sort_extends_to_all_columns_when_underspecified.py: -------------------------------------------------------------------------------- 1 | from ..tables import e 2 | 3 | 4 | title = "Sort extends to all columns when underspecified to ensure that sort order is consistent" 5 | 6 | table_data = { 7 | e: """ 8 | | i1 | i2 | i3 9 | --+---------------- 10 | 1 | 100 | 2 | 101 11 | 1 | 100 | 1 | 102 12 | 1 | 100 | 1 | 103 13 | 2 | 100 | 0 | 500 14 | 2 | 100 | 1 | 1 15 | 2 | 101 | 0 | 1 16 | """, 17 | } 18 | 19 | 20 | def test_sorting_extends_to_selected_column(spec_test): 21 | spec_test( 22 | table_data, 23 | e.sort_by(e.i1, e.i2).first_for_patient().i3, 24 | {1: 102, 2: 500}, 25 | ) 26 | -------------------------------------------------------------------------------- /tests/spec/str_series_ops/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Operations on all series containing strings" 2 | -------------------------------------------------------------------------------- /tests/spec/table_from_rows/__init__.py: -------------------------------------------------------------------------------- 1 | title = "Defining a table using inline data" 2 | -------------------------------------------------------------------------------- /tests/spec/table_from_rows/test_table_from_rows.py: -------------------------------------------------------------------------------- 1 | from ehrql.tables import PatientFrame, Series, table_from_rows 2 | 3 | from ..tables import p 4 | 5 | 6 | title = "Defining a table using inline data" 7 | 8 | table_data = { 9 | p: """ 10 | | i1 11 | --+---- 12 | 1 | 10 13 | 2 | 20 14 | 3 | 30 15 | """, 16 | } 17 | 18 | 19 | def test_table_from_rows(spec_test): 20 | inline_data = [ 21 | (1, 100), 22 | (3, 300), 23 | ] 24 | 25 | @table_from_rows(inline_data) 26 | class t(PatientFrame): 27 | n = Series(int) 28 | 29 | spec_test( 30 | table_data, 31 | p.i1 + t.n, 32 | { 33 | 1: 10 + 100, 34 | 2: None, 35 | 3: 30 + 300, 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/spec/tables.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from ehrql.codes import ICD10MultiCodeString, OPCS4MultiCodeString, SNOMEDCTCode 4 | from ehrql.tables import EventFrame, PatientFrame, Series, table 5 | 6 | 7 | @table 8 | class patient_level_table(PatientFrame): 9 | i1 = Series(int) 10 | i2 = Series(int) 11 | b1 = Series(bool) 12 | b2 = Series(bool) 13 | c1 = Series(SNOMEDCTCode) 14 | m1 = Series(ICD10MultiCodeString) 15 | m2 = Series(OPCS4MultiCodeString) 16 | d1 = Series(datetime.date) 17 | d2 = Series(datetime.date) 18 | s1 = Series(str) 19 | s2 = Series(str) 20 | f1 = Series(float) 21 | f2 = Series(float) 22 | 23 | 24 | @table 25 | class event_level_table(EventFrame): 26 | i1 = Series(int) 27 | i2 = Series(int) 28 | i3 = Series(int) 29 | b1 = Series(bool) 30 | b2 = Series(bool) 31 | c1 = Series(SNOMEDCTCode) 32 | m1 = Series(ICD10MultiCodeString) 33 | m2 = Series(OPCS4MultiCodeString) 34 | d1 = Series(datetime.date) 35 | d2 = Series(datetime.date) 36 | s1 = Series(str) 37 | s2 = Series(str) 38 | f1 = Series(float) 39 | f2 = Series(float) 40 | 41 | 42 | # Define short aliases for terser tests 43 | p = patient_level_table 44 | e = event_level_table 45 | -------------------------------------------------------------------------------- /tests/spec/test_conftest.py: -------------------------------------------------------------------------------- 1 | from ehrql.query_model.nodes import Column, TableSchema 2 | 3 | from .conftest import parse_row, parse_table 4 | 5 | 6 | def test_parse_table(): 7 | assert parse_table( 8 | TableSchema(i1=Column(int), i2=Column(int)), 9 | """ 10 | | i1 | i2 11 | --+-----+----- 12 | 1 | 101 | 111 13 | 2 | 201 | 14 | """, 15 | ) == [ 16 | {"patient_id": 1, "i1": 101, "i2": 111}, 17 | {"patient_id": 2, "i1": 201, "i2": None}, 18 | ] 19 | 20 | 21 | def test_parse_row(): 22 | assert parse_row( 23 | {"patient_id": int, "i1": int, "i2": int}, 24 | ["patient_id", "i1", "i2"], 25 | "1 | 101 | 111", 26 | ) == {"patient_id": 1, "i1": 101, "i2": 111} 27 | -------------------------------------------------------------------------------- /tests/spec/toc.py: -------------------------------------------------------------------------------- 1 | # Table of contents for documentation generated from specs 2 | 3 | contents = { 4 | "filter": [ 5 | "test_where", 6 | "test_except_where", 7 | ], 8 | "sort_and_pick": [ 9 | "test_sort_by_column_and_pick", 10 | "test_sort_by_multiple_columns_and_pick", 11 | "test_sort_by_column_with_nulls_and_pick", 12 | "test_sort_by_interleaved_with_where", 13 | ], 14 | "aggregate_frame": [ 15 | "test_exists_for_patient", 16 | "test_count_for_patient", 17 | ], 18 | "aggregate_series": [ 19 | "test_minimum_and_maximum_for_patient", 20 | "test_sum_for_patient", 21 | "test_mean_for_patient", 22 | "test_count_distinct_for_patient", 23 | ], 24 | "combine_series": [ 25 | "test_patient_series_and_patient_series", 26 | "test_patient_series_and_value", 27 | "test_event_series_and_event_series", 28 | "test_event_series_and_patient_series", 29 | "test_event_series_and_value", 30 | ], 31 | "series_ops": [ 32 | "test_equality", 33 | "test_containment", 34 | "test_containment_with_series", 35 | "test_map_values", 36 | "test_when_null_then", 37 | "test_maximum_of_and_minimum_of_patient_series", 38 | "test_maximum_of_and_minimum_of_event_series", 39 | ], 40 | "bool_series_ops": [ 41 | "test_logical_ops", 42 | "test_conversion", 43 | ], 44 | "int_series_ops": [ 45 | "test_arithmetic_ops", 46 | "test_comparison_ops", 47 | ], 48 | "code_series_ops": [ 49 | "test_containment", 50 | "test_map_codes_to_categories", 51 | ], 52 | "multi_code_string_series_ops": [ 53 | "test_containment", 54 | ], 55 | "case_expressions": [ 56 | "test_case", 57 | "test_when", 58 | ], 59 | "date_series_ops": [ 60 | "test_date_series_ops", 61 | "test_date_comparisons", 62 | "test_date_comparison_types", 63 | "test_date_aggregations", 64 | ], 65 | "str_series_ops": [ 66 | "test_contains", 67 | ], 68 | "population": [ 69 | "test_population", 70 | ], 71 | "table_from_rows": [ 72 | "test_table_from_rows", 73 | ], 74 | } 75 | -------------------------------------------------------------------------------- /tests/support/mssql/setup.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE test 2 | ALTER DATABASE test SET COMPATIBILITY_LEVEL = 100; 3 | 4 | CREATE DATABASE temp_tables 5 | ALTER DATABASE temp_tables SET COMPATIBILITY_LEVEL = 100; 6 | 7 | SET QUOTED_IDENTIFIER ON; 8 | GO 9 | -------------------------------------------------------------------------------- /tests/support/trino/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | if [ "$1" = '/usr/lib/trino/bin/run-trino' ]; then 6 | # If this is the container's first run, initialize the application 7 | # database 8 | if [ ! -f /tmp/app-initialized ]; then 9 | # Initialize the application database asynchronously in a 10 | # background process. This allows a) the trino process to be 11 | # the main process in the container, which allows graceful 12 | # shutdown and other goodies, and b) us to only start the trino 13 | # process once, as opposed to starting, stopping, then 14 | # starting it again. 15 | function initialize_app_database() { 16 | timeout=20 17 | limit="$((SECONDS + timeout))" 18 | 19 | # Note that the container has been initialized so future 20 | # starts won't wipe changes to the data 21 | touch /tmp/app-initialized 22 | } 23 | 24 | initialize_app_database & 25 | fi 26 | fi 27 | 28 | # The Docker library we're using hides stdout from us if the container exits with an error, so send everything to 29 | # stderr. 30 | exec "$@" 1>&2 31 | -------------------------------------------------------------------------------- /tests/support/trino/etc/catalog/trino.properties: -------------------------------------------------------------------------------- 1 | connector.name=memory 2 | memory.max-data-per-node=128MB 3 | -------------------------------------------------------------------------------- /tests/support/trino/etc/config.properties: -------------------------------------------------------------------------------- 1 | #single node install config 2 | coordinator=true 3 | node-scheduler.include-coordinator=true 4 | http-server.http.port=8080 5 | discovery.uri=http://localhost:8080 6 | 7 | # Attempt to retry failed queries. It's possible this will get around the 8 | # transient "No nodes available to run query" errors that we're seeing in the 9 | # long-running generative tests. See: 10 | # https://trino.io/docs/current/admin/properties-query-management.html#retry-policy 11 | # https://trino.io/docs/current/admin/fault-tolerant-execution.html 12 | retry-policy=QUERY 13 | -------------------------------------------------------------------------------- /tests/support/trino/etc/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -agentpath:/usr/lib/trino/bin/libjvmkill.so 3 | # Reduced both the below values from 80% so Trino doesn't try to hog all the 4 | # RAM which we need to run other database containers 5 | -XX:InitialRAMPercentage=40 6 | -XX:MaxRAMPercentage=40 7 | -XX:G1HeapRegionSize=32M 8 | -XX:+ExplicitGCInvokesConcurrent 9 | -XX:+HeapDumpOnOutOfMemoryError 10 | -XX:+ExitOnOutOfMemoryError 11 | -XX:-OmitStackTraceInFastThrow 12 | -XX:ReservedCodeCacheSize=256M 13 | -XX:PerMethodRecompilationCutoff=10000 14 | -XX:PerBytecodeRecompilationCutoff=10000 15 | -Djdk.attach.allowAttachSelf=true 16 | -Djdk.nio.maxCachedBufferSize=2000000 17 | # Improve AES performance for S3, etc. on ARM64 (JDK-8271567) 18 | -XX:+UnlockDiagnosticVMOptions 19 | -XX:+UseAESCTRIntrinsics 20 | # Disable Preventive GC for performance reasons (JDK-8293861) 21 | -XX:-G1UsePreventiveGC 22 | -------------------------------------------------------------------------------- /tests/support/trino/etc/log.properties: -------------------------------------------------------------------------------- 1 | # Enable verbose logging from Trino 2 | #io.trino=DEBUG 3 | -------------------------------------------------------------------------------- /tests/support/trino/etc/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=docker 2 | node.data-dir=/data/trino 3 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/backends/__init__.py -------------------------------------------------------------------------------- /tests/unit/backends/test_emis.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from trino import exceptions as trino_exceptions 3 | 4 | from ehrql.backends.emis import EMISBackend 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "exception", 9 | [ 10 | # These are trino errors that we may want to support in future with 11 | # custom exit codes, but currently inherit from the base method 12 | # Database errors 13 | trino_exceptions.DatabaseError, 14 | # OperationError is a subclass of DatabaseError 15 | trino_exceptions.OperationalError, 16 | # TrinoQueryError is encountered for over-complex/over-nested queries 17 | trino_exceptions.TrinoQueryError, 18 | # TrinoUserError is encountered for out of range numbers 19 | trino_exceptions.TrinoUserError, 20 | # TrinoUserError is encountered for bad/out of range dates 21 | trino_exceptions.TrinoDataError, 22 | ], 23 | ) 24 | def test_backend_exceptions(exception): 25 | backend = EMISBackend() 26 | assert backend.get_exit_status_for_exception(exception) is None 27 | -------------------------------------------------------------------------------- /tests/unit/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/docs/__init__.py -------------------------------------------------------------------------------- /tests/unit/docs/test_common.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.docs.common import get_docstring, get_function_body 4 | 5 | 6 | class ExampleClass: 7 | # Comments above 8 | @staticmethod 9 | def example_method_with_docstring( 10 | arg1: int, 11 | arg2: str, 12 | ) -> str: # pragma: no cover 13 | """ 14 | Docstring goes here 15 | """ 16 | # Make it bigger 17 | arg1 = arg1 + 100 18 | # Make it smaller 19 | arg1 = arg1 // 2 20 | return arg2 + str(arg1) 21 | 22 | def example_method_no_docstring(self): # pragma: no cover 23 | # Return the thing 24 | return "foo" 25 | 26 | 27 | EXPECTED_WITH_DOCSTRING = """\ 28 | # Make it bigger 29 | arg1 = arg1 + 100 30 | # Make it smaller 31 | arg1 = arg1 // 2 32 | return arg2 + str(arg1) 33 | """ 34 | 35 | 36 | EXPECTED_NO_DOCSTRING = """\ 37 | # Return the thing 38 | return "foo" 39 | """ 40 | 41 | 42 | @pytest.mark.parametrize( 43 | "method,expected", 44 | [ 45 | (ExampleClass.example_method_with_docstring, EXPECTED_WITH_DOCSTRING), 46 | (ExampleClass.example_method_no_docstring, EXPECTED_NO_DOCSTRING), 47 | ], 48 | ) 49 | def test_get_function_body(method, expected): 50 | assert get_function_body(method) == expected 51 | 52 | 53 | def test_get_docstring(): 54 | assert ( 55 | get_docstring(ExampleClass.example_method_with_docstring) 56 | == "Docstring goes here" 57 | ) 58 | 59 | 60 | def test_get_docstring_with_default(): 61 | assert ( 62 | get_docstring(ExampleClass.example_method_no_docstring, default="foo") == "foo" 63 | ) 64 | 65 | 66 | def test_get_docstring_with_error(): 67 | with pytest.raises(ValueError, match="No docstring defined for public object"): 68 | get_docstring(ExampleClass.example_method_no_docstring) 69 | -------------------------------------------------------------------------------- /tests/unit/docs/test_language.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.docs.language import is_included_attr 4 | from ehrql.utils.docs_utils import exclude_from_docs 5 | 6 | 7 | class Example: 8 | some_attr = "some_value" 9 | 10 | def some_method(self): 11 | raise NotImplementedError() 12 | 13 | @property 14 | def some_property(self): 15 | raise NotImplementedError() 16 | 17 | def _some_internal_method(self): 18 | raise NotImplementedError() 19 | 20 | @exclude_from_docs 21 | def some_excluded_method(self): 22 | raise NotImplementedError() 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "name,expected", 27 | [ 28 | ("some_attr", False), 29 | ("some_method", True), 30 | ("some_property", True), 31 | ("_some_internal_method", False), 32 | ("some_excluded_method", False), 33 | ], 34 | ) 35 | def test_is_included_attr(name, expected): 36 | value = getattr(Example, name) 37 | assert is_included_attr(name, value) == expected 38 | -------------------------------------------------------------------------------- /tests/unit/docs/test_schemas.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.docs.schemas import get_table_docstring 4 | from ehrql.tables import EventFrame, Series, table 5 | 6 | 7 | def test_get_table_docstring(): 8 | @table 9 | class parent_table(EventFrame): 10 | "I have a docstring" 11 | 12 | col_a = Series(str) 13 | 14 | @table 15 | class child_table(parent_table.__class__): 16 | """ 17 | I have a docstring 18 | 19 | With some extra stuff 20 | """ 21 | 22 | col_b = Series(str) 23 | 24 | assert ( 25 | get_table_docstring(child_table.__class__) 26 | == "I have a docstring\n\nWith some extra stuff" 27 | ) 28 | 29 | 30 | def test_get_table_docstring_with_mismatch(): 31 | @table 32 | class parent_table(EventFrame): 33 | "I have a docstring" 34 | 35 | col_a = Series(str) 36 | 37 | @table 38 | class child_table(parent_table.__class__): 39 | "I have a different docstring" 40 | 41 | col_b = Series(str) 42 | 43 | with pytest.raises(ValueError): 44 | get_table_docstring(child_table.__class__) 45 | -------------------------------------------------------------------------------- /tests/unit/dummy_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/dummy_data/__init__.py -------------------------------------------------------------------------------- /tests/unit/dummy_data/test_dependencies.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pytest 4 | 5 | import ehrql.dummy_data as dummy_data 6 | 7 | 8 | PY_FILES = list(pathlib.Path(dummy_data.__file__).parent.glob("*.py")) 9 | 10 | 11 | @pytest.mark.parametrize("file", PY_FILES, ids=[f.name for f in PY_FILES]) 12 | def test_dummy_data_does_not_refer_to_nextgen(file): 13 | with open(file) as i: 14 | source = i.read() 15 | 16 | namespace = {} 17 | exec(source, namespace, namespace) 18 | for name, value in namespace.items(): 19 | if hasattr(value, "__module__") and value.__module__ is not None: 20 | assert "dummy_data_nextgen" not in value.__module__, name 21 | -------------------------------------------------------------------------------- /tests/unit/dummy_data_nextgen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/dummy_data_nextgen/__init__.py -------------------------------------------------------------------------------- /tests/unit/file_formats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/file_formats/__init__.py -------------------------------------------------------------------------------- /tests/unit/file_formats/test_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.file_formats.base import FileValidationError, validate_columns 4 | from ehrql.query_model.column_specs import ColumnSpec 5 | 6 | 7 | def test_validate_columns(): 8 | # Column order is not significant, neither is the presence of additional columns so 9 | # long as all required columns are present 10 | validate_columns( 11 | ["a", "b", "c", "d"], 12 | { 13 | "c": ColumnSpec(int), 14 | "b": ColumnSpec(int), 15 | "a": ColumnSpec(int), 16 | }, 17 | ) 18 | 19 | 20 | def test_validate_columns_fails_on_missing_columns_by_default(): 21 | with pytest.raises(FileValidationError, match="Missing columns: b, d"): 22 | validate_columns( 23 | ["c", "a"], 24 | { 25 | "a": ColumnSpec(int), 26 | "b": ColumnSpec(int), 27 | "c": ColumnSpec(int), 28 | "d": ColumnSpec(int), 29 | }, 30 | ) 31 | 32 | 33 | def test_validate_columns_allows_missing_columns(): 34 | validate_columns( 35 | ["c", "a"], 36 | { 37 | "a": ColumnSpec(int), 38 | "b": ColumnSpec(int), 39 | "c": ColumnSpec(int), 40 | }, 41 | allow_missing_columns=True, 42 | ) 43 | 44 | 45 | def test_validate_columns_does_not_allow_missing_nonnullable_columns(): 46 | with pytest.raises(FileValidationError, match="Missing columns: b"): 47 | validate_columns( 48 | ["c", "a"], 49 | { 50 | "a": ColumnSpec(int), 51 | "b": ColumnSpec(int, nullable=False), 52 | "c": ColumnSpec(int), 53 | }, 54 | allow_missing_columns=True, 55 | ) 56 | -------------------------------------------------------------------------------- /tests/unit/measures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/measures/__init__.py -------------------------------------------------------------------------------- /tests/unit/measures/test_disclosure_control.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.measures.disclosure_control import apply_sdc 4 | 5 | 6 | @pytest.mark.parametrize("i,expected", [(6, 0), (7, 0), (8, 10)]) 7 | def test_apply_sdc(i, expected): 8 | assert apply_sdc(i) == expected 9 | 10 | 11 | @pytest.mark.parametrize("bad_value", [-1, 7.1]) 12 | def test_apply_sdc_with_bad_value(bad_value): 13 | with pytest.raises(AssertionError): 14 | apply_sdc(bad_value) 15 | -------------------------------------------------------------------------------- /tests/unit/query_engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/query_engines/__init__.py -------------------------------------------------------------------------------- /tests/unit/query_engines/test_in_memory.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | 3 | from ehrql.query_engines.in_memory import InMemoryQueryEngine 4 | from ehrql.query_engines.in_memory_database import InMemoryDatabase 5 | from ehrql.query_language import EventFrame, Series, table 6 | 7 | 8 | @table 9 | class events(EventFrame): 10 | date = Series(date) 11 | 12 | 13 | def test_pick_one_row_per_patient(): 14 | # This test verifies that picking one row per patient works without first having 15 | # applied QM transformations to all variables in a dataset. 16 | database = InMemoryDatabase( 17 | { 18 | events._qm_node: [ 19 | (1, date(2023, 1, 1)), 20 | ], 21 | } 22 | ) 23 | engine = InMemoryQueryEngine(database) 24 | engine.cache = {} 25 | frame = events.sort_by(events.date).first_for_patient() 26 | engine.visit(frame._qm_node) 27 | -------------------------------------------------------------------------------- /tests/unit/query_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/query_model/__init__.py -------------------------------------------------------------------------------- /tests/unit/query_model/test_constraints.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | 3 | from ehrql.tables import Constraint 4 | 5 | 6 | def test_categorical_validation(): 7 | c = Constraint.Categorical((1, "a")) 8 | assert c.validate("a") 9 | assert c.validate(None) 10 | assert not c.validate("") 11 | assert not c.validate("b") 12 | 13 | 14 | def test_not_null_validation(): 15 | c = Constraint.NotNull() 16 | assert c.validate(1) 17 | assert not c.validate(None) 18 | 19 | 20 | def test_unique_validation(): 21 | c = Constraint.Unique() 22 | assert c.validate(1) 23 | 24 | 25 | def test_first_of_month_validation(): 26 | c = Constraint.FirstOfMonth() 27 | assert c.validate(date(2024, 1, 1)) 28 | assert c.validate(None) 29 | assert not c.validate(date(2024, 1, 2)) 30 | 31 | 32 | def test_regex_validation(): 33 | c = Constraint.Regex("E020[0-9]{5}") 34 | assert c.validate("E02012345") 35 | assert c.validate(None) 36 | assert not c.validate("") 37 | assert not c.validate("E020") 38 | 39 | 40 | def test_closed_range_validation(): 41 | c = Constraint.ClosedRange(1, 3) 42 | assert c.validate(2) 43 | assert c.validate(None) 44 | assert not c.validate(0) 45 | assert not c.validate(4) 46 | -------------------------------------------------------------------------------- /tests/unit/query_model/test_graphs.py: -------------------------------------------------------------------------------- 1 | from ehrql import Dataset 2 | from ehrql.query_model.graphs import build_graph 3 | from ehrql.tables.tpp import patients 4 | 5 | 6 | def test_build_graph(): 7 | dataset = Dataset() 8 | year = patients.date_of_birth.year 9 | dataset.define_population(year >= 1940) 10 | dataset.year = year 11 | 12 | # We just want to check that nothing blows up 13 | build_graph(dataset._compile()) 14 | -------------------------------------------------------------------------------- /tests/unit/test_example_data.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | import ehrql 6 | from ehrql.query_engines.local_file import LocalFileQueryEngine 7 | from ehrql.query_language import BaseFrame 8 | from ehrql.tables import core 9 | 10 | 11 | # Example CSV files are given for all core tables 12 | EXAMPLE_TABLES = [getattr(core, table) for table in core.__all__] 13 | 14 | EXAMPLE_DATA_DIR = Path(ehrql.__file__).parent / "example-data" 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "ql_table", 19 | EXAMPLE_TABLES, 20 | ids=lambda t: f"{t.__module__}.{t.__class__.__qualname__}", 21 | ) 22 | def test_populate_database_using_example_data(ql_table: BaseFrame): 23 | # The engine populates the database with the example data and validates the column 24 | # specs in the process 25 | engine = LocalFileQueryEngine(EXAMPLE_DATA_DIR) 26 | engine.populate_database([ql_table._qm_node], allow_missing_columns=False) 27 | -------------------------------------------------------------------------------- /tests/unit/test_pyproject_minimal.py: -------------------------------------------------------------------------------- 1 | import toml 2 | 3 | 4 | def test_pyproject_minimal_is_subset_of_pyproject(): 5 | with open("pyproject.toml") as f: 6 | pyproject = toml.load(f) 7 | with open("pyproject.minimal.toml") as f: 8 | minimal = toml.load(f) 9 | 10 | # `pyproject.minimal.toml` doesn't need to contain everything `pyproject.toml` 11 | # contains, but whatever it does contain should agree with `pyproject.toml` 12 | assert minimal.keys() == {"project"} 13 | for key, value in minimal["project"].items(): 14 | assert value == pyproject["project"][key] 15 | -------------------------------------------------------------------------------- /tests/unit/test_sqlalchemy_types.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | from sqlalchemy import types 5 | 6 | from ehrql.codes import CTV3Code 7 | from ehrql.sqlalchemy_types import type_from_python_type 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "type_,expected", 12 | [ 13 | (bool, types.Boolean), 14 | (datetime.date, types.Date), 15 | (float, types.Float), 16 | (int, types.Integer), 17 | (str, types.String), 18 | (CTV3Code, types.String), 19 | ], 20 | ) 21 | def test_type_from_python_type(type_, expected): 22 | assert type_from_python_type(type_) == expected 23 | 24 | 25 | class UnknownType: ... 26 | 27 | 28 | def test_type_from_python_type_raises_error_on_unknown_type(): 29 | with pytest.raises(TypeError): 30 | type_from_python_type(UnknownType) 31 | 32 | 33 | class TypeWithMethod: 34 | @classmethod 35 | def _primitive_type(cls): 36 | return int 37 | 38 | 39 | def test_type_from_python_type_respects_primitive_type_method(): 40 | assert type_from_python_type(TypeWithMethod) == types.Integer 41 | -------------------------------------------------------------------------------- /tests/unit/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/utils/__init__.py -------------------------------------------------------------------------------- /tests/unit/utils/test_functools_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.utils.functools_utils import singledispatchmethod_with_cache 4 | 5 | 6 | @pytest.fixture 7 | def TestClass(): 8 | COUNTER = 0 9 | 10 | class TestClass: 11 | @singledispatchmethod_with_cache 12 | def test(self, value): 13 | assert False 14 | 15 | @test.register(str) 16 | def test_str(self, value): 17 | # Use a shared counter to give different results for each call 18 | nonlocal COUNTER 19 | COUNTER += 1 20 | return value, COUNTER 21 | 22 | return TestClass 23 | 24 | 25 | def test_results_are_cached(TestClass): 26 | obj = TestClass() 27 | assert obj.test("hello") is obj.test("hello") 28 | 29 | 30 | def test_cache_is_unique_to_instances(TestClass): 31 | obj1 = TestClass() 32 | obj2 = TestClass() 33 | assert obj1.test("hello") is not obj2.test("hello") 34 | 35 | 36 | def test_cache_can_be_cleared(TestClass): 37 | obj = TestClass() 38 | result = obj.test("hello") 39 | obj.test.cache_clear() 40 | assert result is not obj.test("hello") 41 | 42 | 43 | def test_clearing_cache_only_affects_single_instance(TestClass): 44 | obj1 = TestClass() 45 | obj2 = TestClass() 46 | result1 = obj1.test("hello") 47 | result2 = obj2.test("hello") 48 | obj1.test.cache_clear() 49 | assert result1 is not obj1.test("hello") 50 | assert result2 is obj2.test("hello") 51 | -------------------------------------------------------------------------------- /tests/unit/utils/test_log_utils.py: -------------------------------------------------------------------------------- 1 | from ehrql.utils import log_utils 2 | 3 | 4 | def test_kv(): 5 | assert log_utils.kv({}) == "" 6 | assert ( 7 | log_utils.kv({"foo": "foo", "bar": 1, "baz": [1, 2, 3]}) 8 | == "foo=foo bar=1 baz=[1, 2, 3]" 9 | ) 10 | -------------------------------------------------------------------------------- /tests/unit/utils/test_math_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.utils.math_utils import get_grouping_level_as_int 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "all_groups, group_subset,expected", 8 | [ 9 | ([], [], 0), 10 | (["a", "b"], ["a"], 1), 11 | (["a", "b"], ["b"], 2), 12 | (["a", "b"], ["a", "b"], 0), 13 | (["d", "e", "f"], ["d"], 3), 14 | (["d", "e", "f"], ["f", "d"], 2), 15 | (["d", "e", "f"], ["d", "f"], 2), 16 | (["a", "b", "c", "d", "e", "f"], ["a"], 31), 17 | (["a", "b", "c", "d", "e", "f"], ["a", "b", "c", "d", "e", "f"], 0), 18 | ], 19 | ) 20 | def test_get_grouping_level_as_int(all_groups, group_subset, expected): 21 | assert get_grouping_level_as_int(all_groups, group_subset) == expected 22 | -------------------------------------------------------------------------------- /tests/unit/utils/test_regex_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import pytest 4 | 5 | from ehrql.utils import regex_utils 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "re_str,examples", 10 | [ 11 | # Branches 12 | ( 13 | "abc(foo|bar)", 14 | ["abcbar", "abcfoo"], 15 | ), 16 | # Ranges 17 | ( 18 | "[A-Z][0-9]", 19 | ["D1", "V1", "H0", "L9", "E2"], 20 | ), 21 | # Repeats 22 | ( 23 | "A{2,4}_?B{2}", 24 | ["AAABB", "AABB", "AA_BB", "AABB", "AAA_BB"], 25 | ), 26 | # Unbounded repeats 27 | ( 28 | "a+b*", 29 | ["aaaaaaaab", "ab", "aaaaaaaaaa", "aab", "aaaaaabbb"], 30 | ), 31 | # All together now ... 32 | ( 33 | "(none|alpha[A-Z]{3,5}|digit[0-9]{3,5})", 34 | ["alphaCVD", "alphaALT", "alphaFAH", "none", "digit18445"], 35 | ), 36 | ], 37 | ) 38 | def test_create_regex_generator(re_str, examples): 39 | generator = regex_utils.create_regex_generator(re_str) 40 | rnd = random.Random(1234) 41 | assert [generator(rnd) for _ in examples] == examples 42 | 43 | 44 | def test_validate_regex(): 45 | assert regex_utils.validate_regex("E[A-Z]{3}-(foo|bar)") 46 | 47 | 48 | @pytest.mark.parametrize( 49 | "re_str,error", 50 | [ 51 | # Parse errors from Python's regex engine are bubbled up 52 | ("abc(123", r"missing \), unterminated subpattern at position 3"), 53 | # Valid regexes which use unhandled constructs (e.g. non-greedy matches) should 54 | # raise an "unsupported" error 55 | ("t+?test", "unsupported"), 56 | # Subpattern groups are supported, but attempting to set flags inside the group 57 | # is not 58 | ("(?i:TEST)", "unsupported"), 59 | # And neither is unsetting flags 60 | ("(?-i:TEST)", "unsupported"), 61 | ], 62 | ) 63 | def test_validate_regex_error(re_str, error): 64 | with pytest.raises(regex_utils.RegexError, match=error): 65 | regex_utils.validate_regex(re_str) 66 | -------------------------------------------------------------------------------- /tests/unit/utils/test_sequence_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.utils.sequence_utils import ordered_set 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "input_list,expected", 8 | [ 9 | ([4, 3, 2, 3, 5, 5, 2, 2, 1, 4], [4, 3, 2, 5, 1]), 10 | ([4, -1, 3, 3, 2], [4, -1, 3, 2]), 11 | (["f", "d", "f", "f", "d", "e", "f"], ["f", "d", "e"]), 12 | ([1, "d", 2, "d", 3, "d"], [1, "d", 2, 3]), 13 | ], 14 | ) 15 | def test_ordered_set(input_list, expected): 16 | assert ordered_set(input_list) == expected 17 | -------------------------------------------------------------------------------- /tests/unit/utils/test_string_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.utils.string_utils import strip_indent 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "s,expected", 8 | [ 9 | ( 10 | "Should\nbe\nuntouched", 11 | "Should\nbe\nuntouched", 12 | ), 13 | ( 14 | """ 15 | Leading newline and indent should be stripped: 16 | 17 | But nested indent retained 18 | 19 | Like this. 20 | """, 21 | ( 22 | "Leading newline and indent should be stripped:\n" 23 | "\n" 24 | " But nested indent retained\n" 25 | "\n" 26 | "Like this." 27 | ), 28 | ), 29 | ], 30 | ) 31 | def test_strip_indent(s, expected): 32 | assert strip_indent(s) == expected 33 | -------------------------------------------------------------------------------- /tests/unit/utils/test_traceback_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ehrql.utils.traceback_utils import get_trimmed_traceback, walk_traceback 4 | 5 | 6 | # NOTE 7 | # 8 | # These tests exist purely to exercise some edge cases of the module and keep coverage 9 | # happy. The actual behaviour of the module is covered in: 10 | # tests/integeration/utils/test_traceback_utils.py 11 | 12 | 13 | def test_walk_to_end_of_traceback(): 14 | exc = exception_with_traceback() 15 | tb_list = list(walk_traceback(exc.__traceback__)) 16 | assert len(tb_list) == 1 17 | 18 | 19 | def test_get_trimmed_traceback_with_incorrect_filename(): 20 | exc = exception_with_traceback() 21 | with pytest.raises(StopIteration): 22 | get_trimmed_traceback(exc, "no_such_file") 23 | 24 | 25 | def exception_with_traceback(): 26 | try: 27 | raise ValueError() 28 | except ValueError as exc: 29 | return exc 30 | --------------------------------------------------------------------------------