├── .dockerignore
├── .env
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── build-and-deploy.yml
    │   ├── check-docs.yml
    │   ├── deploy-documentation.yml
    │   ├── generative-tests.yml
    │   ├── main.yml
    │   ├── pages-deployment.yml
    │   ├── update-dependencies.yml
    │   ├── update-external-studies.yml
    │   ├── update-pledge.yml
    │   └── update-tpp-schema.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .python-version
├── .vscode
    └── launch.json
├── DEVELOPERS.md
├── Dockerfile
├── GLOSSARY.md
├── LICENSE
├── README.md
├── bin
    ├── LICENSE
    ├── cosmopolitan-release-url.txt
    └── pledge
├── build-dependencies.txt
├── dependencies.txt
├── docs
    ├── explanation
    │   ├── backend-tables.md
    │   ├── index.md
    │   ├── measures.md
    │   ├── output-formats.md
    │   ├── running-ehrql.md
    │   ├── selecting-populations-for-study.md
    │   ├── using-ehrql-in-opensafely-projects.md
    │   ├── vscode-extension.md
    │   ├── vscode_extension_ehrql_debug.png
    │   ├── vscode_extension_menu_bar_button.png
    │   ├── vscode_extension_run_button.png
    │   ├── vscode_extension_run_button_dropdown.png
    │   ├── vscode_extension_search.png
    │   ├── vscode_extensions_icon.png
    │   └── vscode_extensions_icon_updates.png
    ├── how-to
    │   ├── assign-multiple-columns.md
    │   ├── codelists.md
    │   ├── define-population.md
    │   ├── dummy-data.md
    │   ├── dummy-measures-data.md
    │   ├── errors.md
    │   ├── examples.md
    │   ├── index.md
    │   ├── opensafely_exec_create_dummy_tables.png
    │   ├── opensafely_exec_dummy_data_file.png
    │   ├── opensafely_exec_dummy_measures_data_file.png
    │   └── test-dataset-definition.md
    ├── includes
    │   └── generated_docs
    │   │   ├── backends.md
    │   │   ├── cli.md
    │   │   ├── language__codelists.md
    │   │   ├── language__dataset.md
    │   │   ├── language__date_arithmetic.md
    │   │   ├── language__frames.md
    │   │   ├── language__functions.md
    │   │   ├── language__measures.md
    │   │   ├── language__series.md
    │   │   ├── schemas.md
    │   │   ├── schemas
    │   │       ├── core.md
    │   │       ├── emis.md
    │   │       ├── raw.core.md
    │   │       ├── raw.emis.md
    │   │       ├── raw.tpp.md
    │   │       ├── smoketest.md
    │   │       └── tpp.md
    │   │   └── specs.md
    ├── index.md
    ├── reference
    │   ├── backends.md
    │   ├── cheatsheet.md
    │   ├── cli.md
    │   ├── features.md
    │   ├── index.md
    │   ├── language.md
    │   ├── schemas
    │   ├── schemas.md
    │   └── upgrading-ehrql-from-v0-to-v1.md
    ├── sandbox
    │   ├── medications.csv
    │   └── patients.csv
    ├── stylesheets
    │   └── extra.css
    └── tutorial
    │   ├── building-a-dataset
    │       └── index.md
    │   ├── index.md
    │   ├── more-complex-transformations
    │       └── index.md
    │   ├── quiz
    │       ├── index.md
    │       ├── play-button-drop-down.png
    │       └── play-button.png
    │   ├── setting-up
    │       ├── building-codespace.png
    │       ├── enhanced-tracking-protection.png
    │       ├── green-buttons.png
    │       ├── index.md
    │       ├── new-codespace-screen.png
    │       ├── run-button.png
    │       └── successful-run.png
    │   ├── simple-transformations
    │       ├── autocomplete-example.png
    │       └── index.md
    │   ├── using-ehrql-as-part-of-a-study
    │       ├── index.md
    │       └── terminal.png
    │   ├── working-with-data-with-ehrql
    │       └── index.md
    │   └── xkcd-2582.png
├── ehrql
    ├── VERSION
    ├── __init__.py
    ├── __main__.py
    ├── assurance.py
    ├── backends
    │   ├── __init__.py
    │   ├── base.py
    │   ├── emis.py
    │   └── tpp.py
    ├── codes.py
    ├── debugger.py
    ├── docs
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── backends.py
    │   ├── cli.py
    │   ├── common.py
    │   ├── language.py
    │   ├── render_includes
    │   │   ├── __init__.py
    │   │   ├── backends.py
    │   │   ├── cli.py
    │   │   ├── language.py
    │   │   ├── schemas.py
    │   │   └── specs.py
    │   ├── schemas.py
    │   └── specs.py
    ├── dummy_data
    │   ├── __init__.py
    │   ├── generator.py
    │   ├── measures.py
    │   └── query_info.py
    ├── dummy_data_nextgen
    │   ├── __init__.py
    │   ├── generator.py
    │   ├── measures.py
    │   └── query_info.py
    ├── example-data
    │   ├── clinical_events.csv
    │   ├── medications.csv
    │   ├── ons_deaths.csv
    │   ├── patients.csv
    │   └── practice_registrations.csv
    ├── exceptions.py
    ├── file_formats
    │   ├── __init__.py
    │   ├── arrow.py
    │   ├── base.py
    │   ├── console.py
    │   ├── csv.py
    │   ├── main.py
    │   └── validation.py
    ├── loaders.py
    ├── main.py
    ├── measures
    │   ├── __init__.py
    │   ├── calculate.py
    │   ├── disclosure_control.py
    │   └── measures.py
    ├── query_engines
    │   ├── __init__.py
    │   ├── base.py
    │   ├── base_sql.py
    │   ├── debug.py
    │   ├── in_memory.py
    │   ├── in_memory_database.py
    │   ├── local_file.py
    │   ├── mssql.py
    │   ├── mssql_dialect.py
    │   ├── sqlite.py
    │   ├── sqlite_dialect.py
    │   ├── trino.py
    │   └── trino_dialect.py
    ├── query_language.py
    ├── query_model
    │   ├── __init__.py
    │   ├── column_specs.py
    │   ├── graphs.py
    │   ├── introspection.py
    │   ├── nodes.py
    │   ├── population_validation.py
    │   ├── query_graph_rewriter.py
    │   ├── table_schema.py
    │   └── transforms.py
    ├── quiz.py
    ├── renderers.py
    ├── serializer.py
    ├── sqlalchemy_types.py
    ├── tables
    │   ├── __init__.py
    │   ├── core.py
    │   ├── emis.py
    │   ├── raw
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   ├── emis.py
    │   │   └── tpp.py
    │   ├── smoketest.py
    │   └── tpp.py
    └── utils
    │   ├── __init__.py
    │   ├── date_utils.py
    │   ├── docs_utils.py
    │   ├── functools_utils.py
    │   ├── itertools_utils.py
    │   ├── log_utils.py
    │   ├── math_utils.py
    │   ├── module_utils.py
    │   ├── mssql_log_utils.py
    │   ├── regex_utils.py
    │   ├── sequence_utils.py
    │   ├── sqlalchemy_exec_utils.py
    │   ├── sqlalchemy_query_utils.py
    │   ├── string_utils.py
    │   ├── traceback_utils.py
    │   └── typing_utils.py
├── hooks
    ├── __init__.py
    └── parent_snippets.py
├── justfile
├── mkdocs.yml
├── pyproject.minimal.toml
├── pyproject.toml
├── requirements.dev.in
├── requirements.dev.txt
├── requirements.prod.in
├── requirements.prod.txt
├── scripts
    ├── .gitignore
    ├── generate_example_data.py
    ├── generate_quiz_from_answers.py
    └── run-debug.sh
└── tests
    ├── __init__.py
    ├── acceptance
        ├── __init__.py
        ├── external_studies
        │   ├── mainroute_cancer
        │   │   ├── analysis
        │   │   │   ├── codelists.py
        │   │   │   ├── dataset_definition.py
        │   │   │   ├── define_static_dataset.py
        │   │   │   └── measures_demo.py
        │   │   └── codelists
        │   │   │   ├── opensafely-ethnicity-snomed-0removed.csv
        │   │   │   ├── phc-2ww-referral-colorectal.csv
        │   │   │   ├── phc-colorectal-cancer-icd10.csv
        │   │   │   ├── phc-fit-test.csv
        │   │   │   ├── phc-phc-colorectal-cancer-ctv3.csv
        │   │   │   ├── phc-phc-colorectal-cancer-snomed.csv
        │   │   │   ├── phc-symptom-colorectal-cibh.csv
        │   │   │   ├── phc-symptom-colorectal-ida.csv
        │   │   │   ├── phc-symptom-colorectal-pr-bleeding.csv
        │   │   │   ├── phc-symptom-colorectal-wl.csv
        │   │   │   ├── phc-symptom-lowergi-abdo-mass.csv
        │   │   │   ├── phc-symptom-lowergi-abdo-pain.csv
        │   │   │   ├── phc-symptom-lowergi-anaemia.csv
        │   │   │   └── phc-symptoms-colorectal-cancer.csv
        │   ├── qof-diabetes
        │   │   ├── analysis
        │   │   │   ├── codelists.py
        │   │   │   ├── dataset_definition_dm017.py
        │   │   │   ├── dataset_definition_dm020.py
        │   │   │   ├── dataset_definition_dm021.py
        │   │   │   ├── dm_dataset.py
        │   │   │   └── variable_lib_helper.py
        │   │   └── codelists
        │   │   │   ├── nhsd-primary-care-domain-refsets-bldtestdec_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-dm_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-dminvite_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-dmmax_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-dmpcadec_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-dmpcapu_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-dmres_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-ifcchbam_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-mildfrail_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-modfrail_cod.csv
        │   │   │   ├── nhsd-primary-care-domain-refsets-serfruc_cod.csv
        │   │   │   └── nhsd-primary-care-domain-refsets-sevfrail_cod.csv
        │   ├── test-age-distribution
        │   │   └── analysis
        │   │   │   └── dataset_definition.py
        │   └── waiting-list
        │   │   ├── analysis
        │   │       ├── codelists.py
        │   │       ├── dataset_definition_clockstops.py
        │   │       ├── measures_checks.py
        │   │       └── measures_opioid.py
        │   │   └── codelists
        │   │       ├── ons-depression-and-generalised-anxiety-disorder-diagnoses-and-symptoms.csv
        │   │       ├── opensafely-anxiety-disorders.csv
        │   │       ├── opensafely-cancer-excluding-lung-and-haematological-snomed.csv
        │   │       ├── opensafely-chronic-cardiac-disease.csv
        │   │       ├── opensafely-chronic-kidney-disease-snomed.csv
        │   │       ├── opensafely-chronic-liver-disease.csv
        │   │       ├── opensafely-chronic-respiratory-disease.csv
        │   │       ├── opensafely-depression.csv
        │   │       ├── opensafely-diabetes.csv
        │   │       ├── opensafely-ethnicity-snomed-0removed.csv
        │   │       ├── opensafely-haematological-cancer-snomed.csv
        │   │       ├── opensafely-high-dose-long-acting-opioids-openprescribing-dmd.csv
        │   │       ├── opensafely-hypertension.csv
        │   │       ├── opensafely-lung-cancer-snomed.csv
        │   │       ├── opensafely-nsaids-oral.csv
        │   │       ├── opensafely-osteoarthritis.csv
        │   │       ├── opensafely-rheumatoid-arthritis.csv
        │   │       ├── opensafely-strongopioidsCW-dmd.csv
        │   │       ├── opensafely-symptoms-anxiety.csv
        │   │       ├── opensafely-symptoms-depression.csv
        │   │       ├── opensafely-symptoms-pain.csv
        │   │       ├── primis-covid19-vacc-uptake-old-sev_mental_cod.csv
        │   │       ├── user-anschaf-antidepressants-dmd.csv
        │   │       ├── user-anschaf-codeine-for-pain-dmd.csv
        │   │       ├── user-anschaf-gabapentinoids-dmd.csv
        │   │       ├── user-anschaf-long-acting-opioids-dmd.csv
        │   │       ├── user-anschaf-opioids-for-analgesia-dmd.csv
        │   │       ├── user-anschaf-weak-opioids-dmd.csv
        │   │       ├── user-hjforbes-opioid-dependency-clinical-diagnosis.csv
        │   │       ├── user-speed-vm-antidepressants-for-pain-indication-dmd.csv
        │   │       └── user-speed-vm-nsaids-dmd.csv
        ├── test_external_studies.py
        └── update_external_studies.py
    ├── autocomplete
        ├── __init__.py
        ├── autocomplete_definition.py
        ├── language_server.py
        └── test_autocomplete.py
    ├── conftest.py
    ├── docker
        ├── __init__.py
        ├── test_cli.py
        └── test_drivers.py
    ├── docs
        ├── __init__.py
        ├── test_complete_examples.py
        ├── test_find_docs_examples.py
        └── test_run_generate_dataset_example.py
    ├── fixtures
        ├── bad_definition_files
        │   ├── bad_import.py
        │   ├── bad_syntax.py
        │   ├── bad_types.py
        │   ├── empty_measures.py
        │   ├── no_dataset.py
        │   ├── no_measures.py
        │   ├── no_population.py
        │   ├── not_a_dataset.py
        │   ├── not_measures_instance.py
        │   └── operator_error.py
        ├── codelist_csvs
        │   ├── categories.csv
        │   ├── custom_col.csv
        │   ├── default_col.csv
        │   ├── extra_whitespace.csv
        │   └── long_csv.csv
        ├── csv_date_merging
        │   ├── measure_test_2021-01-01.csv
        │   ├── measure_test_2021-02-01.csv
        │   ├── measure_test_2021-03-01.csv
        │   ├── measure_test_20210908.csv
        │   ├── measure_test_code_2021-03-01.csv
        │   ├── measure_test_code_2021-04-01.csv
        │   ├── measure_test_error_2021-01-01.csv
        │   ├── measure_test_error_2021-02-01.csv
        │   └── measure_test_event.csv
        ├── debug
        │   └── patients.csv
        ├── dummy_data
        │   ├── dummy-data.csv
        │   ├── dummy-data.txt
        │   ├── extra-column.csv
        │   ├── invalid-bool.csv
        │   ├── invalid-date.csv
        │   ├── invalid-patient-id.csv
        │   ├── long_covid_dummy_data.csv
        │   ├── missing-column.csv
        │   └── zero-date.csv
        ├── good_definition_files
        │   ├── assurance.py
        │   ├── chatty_dataset_definition.py
        │   ├── dataset_definition.py
        │   ├── dataset_definition_with_print.py
        │   ├── debug_definition.py
        │   └── measure_definitions.py
        ├── local_file_engine
        │   ├── events.csv
        │   └── patients.csv
        └── quiz-example-data
        │   ├── addresses.csv
        │   ├── clinical_events.csv
        │   ├── medications.csv
        │   ├── ons_deaths.csv
        │   ├── patients.csv
        │   └── practice_registrations.csv
    ├── functional
        ├── __init__.py
        ├── test_assure.py
        ├── test_create_dummy_tables.py
        ├── test_debug.py
        ├── test_dump_dataset_sql.py
        ├── test_dump_example_data.py
        ├── test_entrypoint.py
        ├── test_generate_dataset.py
        ├── test_generate_measures.py
        ├── test_graph_query.py
        ├── test_isolation_report.py
        ├── test_serialize_definition.py
        └── test_test_connection.py
    ├── generative
        ├── README.md
        ├── __init__.py
        ├── conftest.py
        ├── data_setup.py
        ├── data_strategies.py
        ├── example.py
        ├── generic_strategies.py
        ├── ignored_errors.py
        ├── recording.py
        ├── test_data_setup.py
        ├── test_query_model.py
        └── variable_strategies.py
    ├── integration
        ├── __init__.py
        ├── backends
        │   ├── __init__.py
        │   ├── conftest.py
        │   ├── helpers.py
        │   ├── test_base.py
        │   ├── test_emis.py
        │   └── test_tpp.py
        ├── file_formats
        │   ├── __init__.py
        │   ├── test_arrow.py
        │   ├── test_csv.py
        │   └── test_main.py
        ├── measures
        │   ├── __init__.py
        │   └── test_calculate.py
        ├── query_engines
        │   ├── __init__.py
        │   ├── test_dialects.py
        │   ├── test_local_file.py
        │   ├── test_mssql.py
        │   ├── test_mssql_dialect.py
        │   └── test_trino_dialect.py
        ├── query_model
        │   ├── __init__.py
        │   └── test_transforms.py
        ├── tables
        │   ├── __init__.py
        │   ├── test_core.py
        │   ├── test_emis.py
        │   └── test_tpp.py
        ├── test_query_engines.py
        └── utils
        │   ├── __init__.py
        │   ├── test_mssql_log_utils.py
        │   ├── test_sqlalchemy_exec_utils.py
        │   ├── test_sqlalchemy_query_utils.py
        │   └── test_traceback_utils.py
    ├── lib
        ├── __init__.py
        ├── create_tpp_test_db.py
        ├── databases.py
        ├── docker.py
        ├── emis_schema.py
        ├── file_utils.py
        ├── gentest_example_simplify.py
        ├── inspect_utils.py
        ├── orm_utils.py
        ├── query_model_utils.py
        ├── test_gentest_example_simplify.py
        ├── tpp_categorical_columns.csv
        ├── tpp_data_dictionary.csv
        ├── tpp_decision_support_reference.csv
        ├── tpp_schema.csv
        ├── tpp_schema.py
        └── update_tpp_schema.py
    ├── spec
        ├── README.md
        ├── __init__.py
        ├── aggregate_frame
        │   ├── __init__.py
        │   ├── test_count_for_patient.py
        │   └── test_exists_for_patient.py
        ├── aggregate_series
        │   ├── __init__.py
        │   ├── test_count_distinct_for_patient.py
        │   ├── test_mean_for_patient.py
        │   ├── test_minimum_and_maximum_for_patient.py
        │   └── test_sum_for_patient.py
        ├── bool_series_ops
        │   ├── __init__.py
        │   ├── test_conversion.py
        │   └── test_logical_ops.py
        ├── case_expressions
        │   ├── __init__.py
        │   ├── test_case.py
        │   └── test_when.py
        ├── code_series_ops
        │   ├── __init__.py
        │   ├── test_containment.py
        │   └── test_map_codes_to_categories.py
        ├── combine_series
        │   ├── __init__.py
        │   ├── test_event_series_and_event_series.py
        │   ├── test_event_series_and_patient_series.py
        │   ├── test_event_series_and_value.py
        │   ├── test_patient_series_and_patient_series.py
        │   └── test_patient_series_and_value.py
        ├── conftest.py
        ├── date_series_ops
        │   ├── __init__.py
        │   ├── test_date_aggregations.py
        │   ├── test_date_comparison_types.py
        │   ├── test_date_comparisons.py
        │   └── test_date_series_ops.py
        ├── dummy
        │   ├── __init__.py
        │   └── test_dummy.py
        ├── filter
        │   ├── __init__.py
        │   ├── test_except_where.py
        │   └── test_where.py
        ├── float_series_ops
        │   ├── __init__.py
        │   ├── test_arithmetic_ops.py
        │   ├── test_comparison_ops.py
        │   ├── test_conversion.py
        │   └── test_division_ops.py
        ├── int_series_ops
        │   ├── __init__.py
        │   ├── test_arithmetic_ops.py
        │   ├── test_comparison_ops.py
        │   ├── test_conversion.py
        │   └── test_division_ops.py
        ├── multi_code_string_series_ops
        │   ├── __init__.py
        │   └── test_containment.py
        ├── population
        │   ├── __init__.py
        │   └── test_population.py
        ├── series_ops
        │   ├── __init__.py
        │   ├── test_containment.py
        │   ├── test_containment_with_series.py
        │   ├── test_equality.py
        │   ├── test_map_values.py
        │   ├── test_maximum_of_and_minimum_of_event_series.py
        │   ├── test_maximum_of_and_minimum_of_patient_series.py
        │   └── test_when_null_then.py
        ├── sort_and_pick
        │   ├── __init__.py
        │   ├── test_sort_by_column_and_pick.py
        │   ├── test_sort_by_column_with_nulls_and_pick.py
        │   ├── test_sort_by_interleaved_with_where.py
        │   ├── test_sort_by_multiple_columns_and_pick.py
        │   └── test_sort_extends_to_all_columns_when_underspecified.py
        ├── str_series_ops
        │   ├── __init__.py
        │   └── test_contains.py
        ├── table_from_rows
        │   ├── __init__.py
        │   └── test_table_from_rows.py
        ├── tables.py
        ├── test_conftest.py
        ├── test_specs.py
        └── toc.py
    ├── support
        ├── mssql
        │   ├── entrypoint.sh
        │   └── setup.sql
        └── trino
        │   ├── entrypoint.sh
        │   └── etc
        │       ├── catalog
        │           └── trino.properties
        │       ├── config.properties
        │       ├── jvm.config
        │       ├── log.properties
        │       └── node.properties
    └── unit
        ├── __init__.py
        ├── backends
            ├── __init__.py
            ├── test_base.py
            ├── test_emis.py
            └── test_tpp.py
        ├── docs
            ├── __init__.py
            ├── test_common.py
            ├── test_language.py
            └── test_schemas.py
        ├── dummy_data
            ├── __init__.py
            ├── test_dependencies.py
            ├── test_generator.py
            └── test_query_info.py
        ├── dummy_data_nextgen
            ├── __init__.py
            ├── test_edge_cases_for_coverage.py
            ├── test_generator.py
            ├── test_measures.py
            ├── test_query_info.py
            └── test_specific_datasets.py
        ├── file_formats
            ├── __init__.py
            ├── test_arrow.py
            ├── test_base.py
            ├── test_console.py
            ├── test_csv.py
            └── test_main.py
        ├── measures
            ├── __init__.py
            ├── test_disclosure_control.py
            ├── test_dummy_data.py
            └── test_measures.py
        ├── query_engines
            ├── __init__.py
            ├── test_in_memory.py
            ├── test_in_memory_database.py
            └── test_mssql_dialect.py
        ├── query_model
            ├── __init__.py
            ├── test_column_specs.py
            ├── test_constraints.py
            ├── test_graphs.py
            ├── test_nodes.py
            ├── test_population_validation.py
            ├── test_query_graph_rewriter.py
            ├── test_table_schema.py
            └── test_transforms.py
        ├── test___main__.py
        ├── test_assurance.py
        ├── test_codes.py
        ├── test_debugger.py
        ├── test_docs.py
        ├── test_example_data.py
        ├── test_loaders.py
        ├── test_main.py
        ├── test_pyproject_minimal.py
        ├── test_query_language.py
        ├── test_quiz.py
        ├── test_renderers.py
        ├── test_serializer.py
        ├── test_sqlalchemy_types.py
        ├── test_tables.py
        └── utils
            ├── __init__.py
            ├── test_date_utils.py
            ├── test_functools_utils.py
            ├── test_itertools_utils.py
            ├── test_log_utils.py
            ├── test_math_utils.py
            ├── test_mssql_log_utils.py
            ├── test_regex_utils.py
            ├── test_sequence_utils.py
            ├── test_sqlalchemy_exec_utils.py
            ├── test_sqlalchemy_query_utils.py
            ├── test_string_utils.py
            ├── test_traceback_utils.py
            └── test_typing_utils.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git/
 2 | 
 3 | **/*~
 4 | **/.#*
 5 | **/*#
 6 | **/htmlcov
 7 | **/__pycache__
 8 | **/*.pyc
 9 | **/.python-version
10 | **/.env
11 | **/.venv
12 | **/venv
13 | **/.coverage
14 | **/*.egg-info/
15 | 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
 1 | # This file defines environment variables we want to be set in development
 2 | # environments. The Just command runner and VSCode's Python extension (but not
 3 | # the terminal) should pick these up automatically, see:
 4 | # https://github.com/casey/just#dotenv-load
 5 | # https://code.visualstudio.com/docs/python/environments#_environment-variables
 6 | #
 7 | # You can load these manually in bash using something like:
 8 | #
 9 | #     set -o allexport; source .env; set +o allexport
10 | #
11 | 
12 | # Disable hash randomisation. The kinds of DoS attacks hash seed randomisation
13 | # is designed to protect against don't apply to ehrQL, and having consistent
14 | # output makes debugging much easier
15 | PYTHONHASHSEED=0
16 | 
17 | # Enable event level queries for testing purposes, but not yet in production
18 | EHRQL_ENABLE_EVENT_LEVEL_QUERIES=True
19 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | updates:
 4 | 
 5 |   - package-ecosystem: "github-actions"
 6 |     directory: "/"
 7 |     schedule:
 8 |       interval: "weekly"
 9 |     commit-message:
10 |       prefix: "chore: "
11 | 


--------------------------------------------------------------------------------
/.github/workflows/check-docs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Check documentation
 3 | 
 4 | on:
 5 |   workflow_dispatch:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |   pull_request:
10 | 
11 | jobs:
12 |   documentation:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - name: Checkout repo
17 |         uses: actions/checkout@v4
18 |       - uses: opensafely-core/setup-action@v1
19 |         with:
20 |           install-just: true
21 |           python-version: "3.11"
22 |           cache-dependency-path: requirements.*.txt
23 | 
24 |       - name: Check generated docs are up-to-date
25 |         run: just docs-check-generated-docs-are-current
26 | 
27 |       # This check becomes somewhat redundant if we fix up the Cloudflare Pages preview
28 |       # to work with Dependabot, because the deployment will also do the build.
29 |       # See https://github.com/opensafely/documentation/issues/930 which documents this problem.
30 |       #
31 |       # However, for any PR, Cloudflare Pages previews sometimes fail for mysterious reasons,
32 |       # and this requires logging into Cloudflare Pages to inspect.
33 |       # So it is perhaps useful to distinguish a Cloudflare failure with an actual issue.
34 |       - name: Check docs build
35 |         run: just docs-build
36 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-documentation.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "Trigger a deploy of opensafely documentation site"
 3 | 
 4 | on:
 5 |   workflow_run:
 6 |     workflows:
 7 |       - CI
 8 |     branches:
 9 |       - main
10 |     types:
11 |       - completed
12 | 
13 | jobs:
14 |   build-docs:
15 |     runs-on: ubuntu-latest
16 |     if: ${{ github.event.workflow_run.conclusion == 'success' }}
17 | 
18 |     steps:
19 |       - name: Trigger documentation deploy
20 |         uses: actions/github-script@v7
21 |         with:
22 |           github-token: ${{ secrets.DOCS_WRITE_TOKEN }}
23 |           script: |
24 |             github.rest.actions.createWorkflowDispatch({
25 |               owner: 'opensafely',
26 |               repo: 'documentation',
27 |               workflow_id: 'pages-deployment.yml',
28 |               ref: 'main'
29 |             });
30 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: CI
 3 | 
 4 | on:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 | 
10 | jobs:
11 |   check:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - uses: opensafely-core/setup-action@v1
17 |         with:
18 |           install-just: true
19 |           python-version: "3.11"
20 |       - name: Set up development environment
21 |         run: just devenv
22 |       - name: Check formatting and linting rules
23 |         run: just check
24 | 
25 |   test:
26 |     runs-on: ubuntu-latest
27 | 
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |       - uses: opensafely-core/setup-action@v1
31 |         with:
32 |           install-just: true
33 |           python-version: "3.11"
34 |       - name: Set up development environment
35 |         run: just devenv
36 |       - name: Run tests
37 |         run: |
38 |           just test-all
39 | 
40 |   tag-new-version:
41 |     # This uses `conventional commits` to generate tags.  A full list
42 |     # of valid prefixes is here:
43 |     # https://github.com/commitizen/conventional-commit-types/blob/master/index.json
44 |     #
45 |     # fix, perf -> patch release
46 |     # feat -> minor release
47 |     # BREAKING CHANGE in footer -> major release
48 |     #
49 |     # anything else (docs, refactor, etc) does not create a release
50 |     if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request'
51 |     needs: [check, test]
52 |     runs-on: ubuntu-latest
53 |     outputs:
54 |       tag: ${{ steps.tag.outputs.new_version }}
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |         with:
58 |           fetch-depth: 0
59 |       - name: Bump version and push tag
60 |         id: tag
61 |         uses: mathieudutour/github-tag-action@a22cf08638b34d5badda920f9daf6e72c477b07b #v6.2
62 |         with:
63 |           github_token: ${{ secrets.GITHUB_TOKEN }}
64 |           default_bump: false
65 |           release_branches: main
66 | 


--------------------------------------------------------------------------------
/.github/workflows/pages-deployment.yml:
--------------------------------------------------------------------------------
 1 | on: [push]
 2 | 
 3 | jobs:
 4 |   deploy:
 5 | 
 6 |     permissions:
 7 |       contents: read
 8 |       deployments: write
 9 | 
10 |     runs-on: ubuntu-latest
11 | 
12 |     name: Deploy to Cloudflare Pages
13 |     steps:
14 |       - name: Checkout repo
15 |         uses: actions/checkout@v4
16 |         with:
17 |           submodules: true
18 | 
19 |       - name: Install Python and just
20 |         uses: opensafely-core/setup-action@v1
21 |         with:
22 |           install-just: true
23 |           python-version: "3.11"
24 | 
25 |       - name: Check docs are current
26 |         run: just docs-check-generated-docs-are-current
27 | 
28 |       - name: Build site
29 |         run: just docs-build
30 | 
31 |       - name: Add a version file
32 |         run: echo ${{ github.sha }} > site/version.html
33 | 
34 |       - name: Publish
35 |         if: ${{ github.actor != 'dependabot[bot]' }}
36 |         uses: cloudflare/pages-action@f0a1cd58cd66095dee69bfa18fa5efd1dde93bca  # v1.5.0
37 |         with:
38 |           accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
39 |           apiToken: ${{ secrets.CLOUDFLARE_DIRECT_UPLOAD_API_TOKEN }}
40 |           directory: "site"
41 |           gitHubToken: ${{ secrets.GITHUB_TOKEN }}
42 |           projectName: "databuilder-docs"
43 | 


--------------------------------------------------------------------------------
/.github/workflows/update-dependencies.yml:
--------------------------------------------------------------------------------
 1 | name: Update python dependencies
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron:  "0 4 * * WED"
 7 | 
 8 | jobs:
 9 |   update-dependencies:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v4
13 |     - uses: "opensafely-core/setup-action@v1"
14 |       with:
15 |         python-version: "3.11"
16 |         install-just: true
17 | 
18 |     - uses: actions/create-github-app-token@v2
19 |       id: generate-token
20 |       with:
21 |         app-id: 1031449  # opensafely-core Create PR app
22 |         private-key: ${{ secrets.CREATE_PR_APP_PRIVATE_KEY }}
23 | 
24 |     - uses: bennettoxford/update-dependencies-action@v1
25 |       id: update
26 |       with:
27 |         token: ${{ steps.generate-token.outputs.token }}
28 | 
29 |     - name: Notify slack of PR
30 |       if: ${{ steps.update.outputs.pull-request-operation != 'none' }}
31 |       uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52  # v2.1.0
32 |       with:
33 |         method: chat.postMessage
34 |         token: ${{ secrets.BENNETTBOT_SLACK_BOT_TOKEN }}
35 |         payload: |
36 |           channel: "C080S7W2ZPX"
37 |           text: "Update dependencies\n${{ steps.update.outputs.pull-request-url }}"
38 | 


--------------------------------------------------------------------------------
/.github/workflows/update-pledge.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "Create PR to update `bin/pledge`"
 3 | 
 4 | on:
 5 |   workflow_dispatch:
 6 |   schedule:
 7 |     - cron:  "33 2 * * *"
 8 | 
 9 | jobs:
10 |   create_pr_to_update_pledge:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: opensafely-core/setup-action@v1
16 |         with:
17 |           install-just: true
18 |           python-version: "3.11"
19 | 
20 |       - name: "Ensure `bin/pledge` is at latest version"
21 |         run: just update-pledge
22 | 
23 |       - name: Generate app token
24 |         uses: actions/create-github-app-token@v2
25 |         id: generate-token
26 |         with:
27 |           app-id: 1031449  # opensafely-core Create PR app
28 |           private-key: ${{ secrets.CREATE_PR_APP_PRIVATE_KEY }}
29 | 
30 |       - name: "Create a Pull Request if there are any changes"
31 |         id: create_pr
32 |         uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
33 |         with:
34 |           add-paths: bin/*
35 |           branch: bot/update-pledge
36 |           base: main
37 |           author: "opensafely-github-bot <opensafely-github-bot@users.noreply.github.com>"
38 |           committer: "opensafely-github-bot <opensafely-github-bot@users.noreply.github.com>"
39 |           commit-message: "fix: Update `bin/pledge`"
40 |           title: "Update `bin/pledge`"
41 |           token: ${{ steps.generate-token.outputs.token }}
42 | 
43 |       # The PR will still require manual approval, this just reduces it to a one-click process
44 |       - name: Enable automerge
45 |         if: steps.create_pr.outputs.pull-request-operation == 'created'
46 |         run: gh pr merge --auto --squash ${{ steps.create_pr.outputs.pull-request-number }}
47 |         env:
48 |           GH_TOKEN: ${{ steps.generate-token.outputs.token }}
49 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3.11
 3 | 
 4 | exclude: tests/acceptance/external_studies/
 5 | 
 6 | repos:
 7 |   - repo: local
 8 |     hooks:
 9 |     - id: check
10 |       name: check
11 |       entry: just check
12 |       language: system
13 |       types: [python]
14 |       require_serial: true
15 |       pass_filenames: false
16 | 
17 |   - repo: https://github.com/pre-commit/pre-commit-hooks
18 |     rev: v4.3.0
19 |     hooks:
20 |     - id: trailing-whitespace
21 |     - id: end-of-file-fixer
22 |     - id: debug-statements
23 |     - id: check-ast
24 |     - id: check-json
25 |     - id: check-toml
26 |     - id: check-yaml
27 |       # --unsafe is a workaround for the use of !! in mkdocs.yml.
28 |       args: [--unsafe]
29 |     - id: detect-private-key
30 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | **
2 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.2.0",
 3 |   "configurations": [
 4 |     {
 5 |       "name": "Debug: Current test file",
 6 |       "type": "debugpy",
 7 |       "request": "launch",
 8 |       "args": ["${file}"],
 9 |       "module": "pytest",
10 |       "console": "integratedTerminal"
11 |     },
12 |     {
13 |       "name": "Debug: Generate docs",
14 |       "type": "debugpy",
15 |       "request": "launch",
16 |       "args": ["docs/includes/generated_docs"],
17 |       "module": "ehrql.docs",
18 |       "console": "integratedTerminal"
19 |     }
20 |   ]
21 | }
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | OpenSAFELY ehrQL
 2 | Copyright (C) University of Oxford
 3 | 
 4 | This program is free software: you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation, either version 3 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # OpenSAFELY ehrQL
 2 | 
 3 | ehrQL is a Python-based query language for electronic health record (EHR) data.
 4 | It has been designed for use with the OpenSAFELY platform.
 5 | 
 6 | Documentation is at the [OpenSAFELY documentation site](https://docs.opensafely.org/ehrql).
 7 | 
 8 | # For developers
 9 | 
10 | See [DEVELOPERS.md](DEVELOPERS.md).
11 | 
12 | There is also [a glossary](GLOSSARY.md) of terms used in the codebase.
13 | 
14 | # About the OpenSAFELY framework
15 | 
16 | The OpenSAFELY framework is a Trusted Research Environment (TRE) for electronic
17 | health records research in the NHS, with a focus on public accountability and
18 | research quality.
19 | 
20 | Read more at [OpenSAFELY.org](https://opensafely.org).
21 | 


--------------------------------------------------------------------------------
/bin/LICENSE:
--------------------------------------------------------------------------------
 1 | The accompanying binary `pledge` is included under the license below.
 2 | 
 3 | ---
 4 | 
 5 | ISC License
 6 | 
 7 | Copyright 2020 Justine Alexandra Roberts Tunney
 8 | 
 9 | Permission to use, copy, modify, and/or distribute this software for
10 | any purpose with or without fee is hereby granted, provided that the
11 | above copyright notice and this permission notice appear in all copies.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
14 | WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
16 | AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
17 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
18 | PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
19 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20 | PERFORMANCE OF THIS SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/bin/cosmopolitan-release-url.txt:
--------------------------------------------------------------------------------
1 | https://github.com/jart/cosmopolitan/releases/download/4.0.2/cosmos-4.0.2.zip
2 | 


--------------------------------------------------------------------------------
/bin/pledge:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/bin/pledge


--------------------------------------------------------------------------------
/build-dependencies.txt:
--------------------------------------------------------------------------------
1 | # list ubuntu packges needed to build dependencies, one per line
2 | python3.11-dev
3 | build-essential
4 | 


--------------------------------------------------------------------------------
/dependencies.txt:
--------------------------------------------------------------------------------
 1 | # list ubuntu packages needed in production, one per line
 2 | # run time dependencies
 3 | # ensure fully working base python3 installation
 4 | # see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e
 5 | python3.11
 6 | python3.11-venv
 7 | python3.11-distutils
 8 | 
 9 | # from packages.microsoft.com
10 | mssql-tools
11 | 


--------------------------------------------------------------------------------
/docs/explanation/index.md:
--------------------------------------------------------------------------------
 1 | These explanations provide background knowledge for learning ehrQL.
 2 | 
 3 | * [ehrQL backend tables](backend-tables.md)
 4 | * [ehrQL output formats](output-formats.md)
 5 | * [Using ehrQL in OpenSAFELY projects](using-ehrql-in-opensafely-projects.md)
 6 | * [Running ehrQL](running-ehrql.md)
 7 | * [Using the measures framework](measures.md)
 8 | * [Selecting populations for study](selecting-populations-for-study.md)
 9 | * [The OpenSAFELY VS Code extension](vscode-extension.md)
10 | 


--------------------------------------------------------------------------------
/docs/explanation/selecting-populations-for-study.md:
--------------------------------------------------------------------------------
 1 | This page is aimed at researchers working with NHS England patient record data
 2 | as provided by the OpenSAFELY backends.
 3 | 
 4 | ## Continuity of patient data
 5 | 
 6 | Within the NHS in England, it is usually assumed that
 7 | a patient's current primary care record is complete:
 8 | when a patient moves practice to another practice in England,
 9 | their record moves with them.
10 | The electronic health records of patients transferring between practices in England
11 | should automatically get transferred via the GP2GP system.
12 | 
13 | Known caveats are that:
14 | 
15 | * not all data may be transferred; for example, appointment data
16 | * not all data may be available at once; for example, information on repeat prescriptions
17 | 
18 | Refer to the [GP2GP site](https://digital.nhs.uk/services/gp2gp)
19 | and the [GP2GP Key Activities documentation (PDF)](https://digital.nhs.uk/binaries/content/assets/website-assets/services/gp2gp/gp2gp_key_activities_2017_v0_4.pdf)
20 | for further details of this transfer process.
21 | 
22 | !!! note
23 |     Researchers using OpenSAFELY may wish to select patients
24 |     with a continuous registration.
25 |     "Continuous registration" here means that
26 |     a patient did not change practice during a time period of interest.
27 | 
28 |     For TPP,
29 |     there is a [method to select patients with a continuous registration](../reference/schemas/tpp.md#practice_registrations.spanning).
30 | 


--------------------------------------------------------------------------------
/docs/explanation/vscode_extension_ehrql_debug.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_ehrql_debug.png


--------------------------------------------------------------------------------
/docs/explanation/vscode_extension_menu_bar_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_menu_bar_button.png


--------------------------------------------------------------------------------
/docs/explanation/vscode_extension_run_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_run_button.png


--------------------------------------------------------------------------------
/docs/explanation/vscode_extension_run_button_dropdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_run_button_dropdown.png


--------------------------------------------------------------------------------
/docs/explanation/vscode_extension_search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extension_search.png


--------------------------------------------------------------------------------
/docs/explanation/vscode_extensions_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extensions_icon.png


--------------------------------------------------------------------------------
/docs/explanation/vscode_extensions_icon_updates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/explanation/vscode_extensions_icon_updates.png


--------------------------------------------------------------------------------
/docs/how-to/index.md:
--------------------------------------------------------------------------------
 1 | The how-to guides provide practical steps for working with ehrQL in your project.
 2 | 
 3 | * [Using ehrQL to answer specific questions](examples.md)
 4 | * [How to include and exclude patients from your study population](define-population.md)
 5 | * [Resolving ehrQL errors](errors.md)
 6 | * [How to use dummy data in an ehrQL dataset definition](dummy-data.md)
 7 | * [How to use dummy data in an ehrQL measures definition](dummy-measures-data.md)
 8 | * [How to assign multiple columns to a dataset programmatically](assign-multiple-columns.md)
 9 | * [How to work with codelists](codelists.md)
10 | * [How to test your dataset definition](test-dataset-definition.md)
11 | 


--------------------------------------------------------------------------------
/docs/how-to/opensafely_exec_create_dummy_tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/how-to/opensafely_exec_create_dummy_tables.png


--------------------------------------------------------------------------------
/docs/how-to/opensafely_exec_dummy_data_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/how-to/opensafely_exec_dummy_data_file.png


--------------------------------------------------------------------------------
/docs/how-to/opensafely_exec_dummy_measures_data_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/how-to/opensafely_exec_dummy_measures_data_file.png


--------------------------------------------------------------------------------
/docs/includes/generated_docs/language__codelists.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <h4 class="attr-heading" id="codelist_from_csv" data-toc-label="codelist_from_csv" markdown>
 3 |   <tt><strong>codelist_from_csv</strong>(<em>filename</em>, <em>column</em>, <em>category_column=None</em>)</tt>
 4 | </h4>
 5 | <div markdown="block" class="indent">
 6 | Read a codelist from a CSV file as either a list or a dictionary (for categorised
 7 | codelists).
 8 | 
 9 | _filename_<br>
10 | Path to the file on disk, relative to the root of your repository. (Remember to use
11 | UNIX/style/forward-slashes not Windows\style\backslashes.)
12 | 
13 | _column_<br>
14 | Name of the column in the CSV file which contains the codes.
15 | 
16 | _category_column_<br>
17 | Optional name of a column in the CSV file which contains categories to which each
18 | code should be mapped. If this argument is passed then the resulting codelist will
19 | be a dictionary mapping each code to its corresponding category. This can be passed
20 | to the [`to_category()`](#CodePatientSeries.to_category) method to map a series of
21 | codes to a series of categories.
22 | 
23 | For more detail see the [how-to guide](../how-to/examples.md/#using-codelists-with-category-columns).
24 | </div>
25 | 


--------------------------------------------------------------------------------
/docs/includes/generated_docs/schemas/smoketest.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # <strong>smoketest</strong> schema
 3 | 
 4 | Available on backends: [**TPP**](../backends.md#tpp), [**EMIS**](../backends.md#emis)
 5 | 
 6 | This tiny schema is used to write a [minimal dataset definition][smoketest_repo] that
 7 | can function as a basic end-to-end test (or "smoke test") of the OpenSAFELY platform
 8 | across all available backends.
 9 | 
10 | [smoketest_repo]: https://github.com/opensafely/test-age-distribution
11 | 
12 | ``` {.python .copy title='To use this schema in an ehrQL file:'}
13 | from ehrql.tables.smoketest import (
14 |     patients,
15 | )
16 | ```
17 | 
18 | <p class="dimension-indicator"><code>one row per patient</code></p>
19 | ## patients
20 | 
21 | 
22 | <div markdown="block" class="definition-list-wrapper">
23 |   <div class="title">Columns</div>
24 |   <dl markdown="block">
25 | <div markdown="block">
26 |   <dt id="patients.date_of_birth">
27 |     <strong>date_of_birth</strong>
28 |     <a class="headerlink" href="#patients.date_of_birth" title="Permanent link">🔗</a>
29 |     <code>date</code>
30 |   </dt>
31 |   <dd markdown="block">
32 | Patient's year and month of birth, provided in format YYYY-MM-01. The day will always be the first of the month.
33 | 
34 |  * Always the first day of a month
35 |  * Never `NULL`
36 |   </dd>
37 | </div>
38 | 
39 |   </dl>
40 | </div>
41 | 


--------------------------------------------------------------------------------
/docs/reference/backends.md:
--------------------------------------------------------------------------------
 1 | Dataset definitions written in ehrQL can be run inside different secure
 2 | environments, managed by different providers of EHR data.
 3 | 
 4 | For each such secure environment, there is a corresponding "backend"
 5 | defined in ehrQL. Each ehrQL backend:
 6 | 
 7 | * specifies the datasets available inside each secure environment
 8 | * does the necessary translation work to allow the same
 9 |   dataset definition to run against data modelled in different ways and
10 |   stored in different systems
11 | 
12 | When writing a dataset definition you don't need to explicitly reference
13 | any particular backend. But, as not every dataset is available in every
14 | backend, the [table schema](schemas.md) you use to write your dataset
15 | definition will determine which backends it can be run against.
16 | 
17 | Below are the backends currently supported in ehrQL, together with the
18 | list of [table schemas](schemas.md) each one supports.
19 | 
20 | 
21 | ---8<-- 'includes/generated_docs/backends.md'
22 | 


--------------------------------------------------------------------------------
/docs/reference/cli.md:
--------------------------------------------------------------------------------
1 | ---8<-- 'includes/generated_docs/cli.md'
2 | 


--------------------------------------------------------------------------------
/docs/reference/features.md:
--------------------------------------------------------------------------------
 1 | This reference is structured as a series of examples.
 2 | 
 3 | The intended audience is primarily:
 4 | 
 5 | * researchers
 6 | * software developers
 7 | 
 8 | that already have some understanding of how the ehrQL works.
 9 | 
10 | !!! info
11 |     Please refer to the introduction and tutorial documentation sections
12 |     if you need more explanation of the underlying concepts behind ehrQL.
13 | 
14 | ## How the examples work
15 | 
16 | Each individual example demonstrates a specific ehrQL feature in isolation.
17 | 
18 | Every example here consists of:
19 | 
20 | 1. Headings and subheadings that summarise the feature being demonstrated.
21 | 2. A small example data input table containing entirely fictitious variables and values.
22 |     * The table has a single-letter name referred to throughout the example
23 |         * `e` for event-level table
24 |         * `p` for patient-level table.
25 |     * The columns of input tables use a name constructed from a single letter with a number
26 |       to create an identifier — for example, `i1`.
27 |       The single letter in the identifier refers to the column's data type:
28 |         * a `b` column contains Boolean values
29 |         * a `c` column contains electronic health record codes
30 |           (the codes used in this reference are fictitious, for example: `abc`)
31 |         * a `d` column contains dates
32 |         * an `i` column contains integers
33 |         * an `s` column contains strings
34 |     * Both table and column names are written with code formatting throughout this reference.
35 | 3. An ehrQL query that extracts some data from the example table.
36 |    Like the table names, ehrQL queries are displayed here with code formatting.
37 | 4. The resulting output from the ehrQL query,
38 |    displayed as another table,
39 |    to demonstrate the query's effect
40 | 
41 | !!! note
42 |     The examples here are automatically generated from [ehrQL's specification tests](https://github.com/opensafely-core/ehrql/tree/main/tests/spec).
43 | 
44 | ---8<-- 'includes/generated_docs/specs.md'
45 | 
46 | !!! parent_snippet:'includes/glossary.md'
47 | 


--------------------------------------------------------------------------------
/docs/reference/index.md:
--------------------------------------------------------------------------------
 1 | The reference provides background knowledge for working with ehrQL in your project.
 2 | 
 3 | * [Language reference](language.md)
 4 | * [Language features](features.md)
 5 | * [Backends](backends.md)
 6 | * [Table schemas](schemas.md)
 7 | * [Command line interface](cli.md)
 8 | * [Cheatsheet](cheatsheet.md)
 9 | * [Upgrading ehrQL from v0 to v1](upgrading-ehrql-from-v0-to-v1.md)
10 | 


--------------------------------------------------------------------------------
/docs/reference/schemas:
--------------------------------------------------------------------------------
1 | ../includes/generated_docs/schemas


--------------------------------------------------------------------------------
/docs/reference/schemas.md:
--------------------------------------------------------------------------------
1 | Table schemas define the tables and columns available to query in a
2 | dataset definition. The schema a dataset definition is written against
3 | determines which [backends](backends.md) it can be run inside.
4 | 
5 | Below are a list of all table schemas available in ehrQL, together with
6 | the backends that support them.
7 | 
8 | ---8<-- 'includes/generated_docs/schemas.md'
9 | 


--------------------------------------------------------------------------------
/docs/sandbox/medications.csv:
--------------------------------------------------------------------------------
1 | patient_id,date,dmd_code
2 | 1,2023-01-01,9207411000001106
3 | 2,2023-02-02,39695411000001103
4 | 2,2023-03-03,39695411000001103
5 | 4,2023-04-04,9207411000001106
6 | 4,2023-05-05,39695411000001103
7 | 4,2023-06-06,9207411000001106
8 | 


--------------------------------------------------------------------------------
/docs/sandbox/patients.csv:
--------------------------------------------------------------------------------
1 | patient_id,date_of_birth,sex,date_of_death
2 | 1,1980-01-01,F,
3 | 2,1990-02-01,M,
4 | 3,2000-03-01,F,
5 | 4,2010-04-01,M,
6 | 


--------------------------------------------------------------------------------
/docs/tutorial/index.md:
--------------------------------------------------------------------------------
 1 | This tutorial provides practical steps for learning ehrQL.
 2 | 
 3 | ![An XKCD cartoon](xkcd-2582.png)
 4 | 
 5 | You will work through using ehrQL to:
 6 | 
 7 | * identify patients who should be on the QOF register for diabetes, and
 8 | * categorise patients on the register according to various QOF business rules.
 9 | 
10 | ??? tip "What's QOF?"
11 |     QOF is the [Quality and Outcomes Framework][1].
12 |     It is an incentive programme for GP practices.
13 |     To recieve payment, GPs must keep registers of patients with various conditions, and demonstrate that they are providing care according to business rules.
14 | 
15 |     An example from the diabetes business rules (found [here][2]) would be that patients with a new diagnosis of diabetes should be referred to a structured education programme within nine months of diagnosis.
16 | 
17 | The tutorial is interactive.
18 | You will get the most out of it if you run the code yourself, and do experiments by changing the code and trying to explain what you see.
19 | 
20 | At the end of the tutorial there will be a quiz.
21 | 
22 | You can find a tutorial for writing an OpenSAFELY study in our [Getting started][3] guide.
23 | 
24 | Get started with [Setting up](./setting-up/index.md).
25 | 
26 | ---
27 | 
28 | There is also a [video walkthrough](https://www.youtube.com/watch?v=hjBShGRgsWs) of this tutorial available on YouTube.
29 | 
30 | [![The thumbnail for the ehrQL video tutorial](https://img.youtube.com/vi/hjBShGRgsWs/0.jpg)](https://www.youtube.com/watch?v=hjBShGRgsWs)
31 | 
32 | ---
33 | 
34 | Cartoon from [xkcd.com/2582](https://xkcd.com/2582).
35 | 
36 | [1]: https://qof.digital.nhs.uk/
37 | [2]: https://digital.nhs.uk/data-and-information/data-collections-and-data-sets/data-collections/quality-and-outcomes-framework-qof/business-rules/quality-and-outcomes-framework-qof-business-rules-v49-2024-25
38 | [3]: https://docs.opensafely.org/getting-started/
39 | 


--------------------------------------------------------------------------------
/docs/tutorial/quiz/play-button-drop-down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/quiz/play-button-drop-down.png


--------------------------------------------------------------------------------
/docs/tutorial/quiz/play-button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/quiz/play-button.png


--------------------------------------------------------------------------------
/docs/tutorial/setting-up/building-codespace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/building-codespace.png


--------------------------------------------------------------------------------
/docs/tutorial/setting-up/enhanced-tracking-protection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/enhanced-tracking-protection.png


--------------------------------------------------------------------------------
/docs/tutorial/setting-up/green-buttons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/green-buttons.png


--------------------------------------------------------------------------------
/docs/tutorial/setting-up/new-codespace-screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/new-codespace-screen.png


--------------------------------------------------------------------------------
/docs/tutorial/setting-up/run-button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/run-button.png


--------------------------------------------------------------------------------
/docs/tutorial/setting-up/successful-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/setting-up/successful-run.png


--------------------------------------------------------------------------------
/docs/tutorial/simple-transformations/autocomplete-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/simple-transformations/autocomplete-example.png


--------------------------------------------------------------------------------
/docs/tutorial/using-ehrql-as-part-of-a-study/index.md:
--------------------------------------------------------------------------------
 1 | The last piece in the puzzle is to demonstrate how to use a dataset definition in an OpenSAFELY study.
 2 | An OpenSAFELY study consists of a set of actions.
 3 | At least one action must be an ehrQL action, to extract a dataset from an OpenSAFELY backend.
 4 | 
 5 | You can run a single action using [`opensafely exec`][1].
 6 | 
 7 | In your Codespace, open a terminal by pressing `Ctrl+J`, and run:
 8 | 
 9 | ```
10 | opensafely exec ehrql:v1 generate-dataset dataset_definition.py --dummy-tables dummy_tables
11 | ```
12 | 
13 | You should see the terminal fill with a table of data in CSV format.
14 | Scroll up to see the column headers, and notice the two columns from your dataset definition (`prt_or_mal` and `ace_or_arb`).
15 | 
16 | ![A screenshot of the terminal in a Codespace](terminal.png)
17 | 
18 | > Question: what happens if you rename the `dataset` variable and run the `opensafely exec` command again?
19 | 
20 | ??? tip "The anatomy of an OpenSAFELY command"
21 |     What do the parts of the OpenSAFELY command
22 |     `opensafely exec ehrql:v1 generate-dataset dataset_definition.py`
23 |     do?
24 | 
25 |     * `opensafely exec` executes an OpenSAFELY action independently of other OpenSAFELY actions
26 |     * `ehrql` is the OpenSAFELY action to execute
27 |     * `v1` is the major version of the ehrQL action
28 |     * `generate-dataset` is the ehrQL command to generate a dataset from a dataset definition
29 |     * `dataset_definition.py` is the dataset definition
30 |     * `--dummy-tables dummy_tables` gives the path to the dummy data
31 | 
32 | Note: [the main OpenSAFELY tutorial][2] documents how you can describe the actions of your study in a file called `project.yaml`.
33 | 
34 | Next: [Test your ehrQL knowledge with a quiz!](../quiz/index.md)
35 | 
36 | !!! abstract "Feedback"
37 |     Don't fancy the quiz? That's not a problem, but if you could fill in this very short [feedback form][3]{:target="_blank"} we'd really appreciate it.
38 | 
39 | 
40 | [1]: https://docs.opensafely.org/opensafely-cli/#exec-interactive-development
41 | [2]: https://docs.opensafely.org/getting-started/tutorial/run-the-project-pipeline/
42 | [3]: https://docs.google.com/forms/d/e/1FAIpQLSeouuTXPnwShAjBllyln4tl2Q52PMG_aUhpma4odpE2MmCngg/viewform
43 | 


--------------------------------------------------------------------------------
/docs/tutorial/using-ehrql-as-part-of-a-study/terminal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/using-ehrql-as-part-of-a-study/terminal.png


--------------------------------------------------------------------------------
/docs/tutorial/xkcd-2582.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/docs/tutorial/xkcd-2582.png


--------------------------------------------------------------------------------
/ehrql/VERSION:
--------------------------------------------------------------------------------
1 | dev
2 | 


--------------------------------------------------------------------------------
/ehrql/__init__.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ehrql.codes import codelist_from_csv
 4 | from ehrql.debugger import show
 5 | from ehrql.measures import INTERVAL, Measures, create_measures
 6 | from ehrql.query_language import (
 7 |     Dataset,
 8 |     Error,
 9 |     case,
10 |     create_dataset,
11 |     days,
12 |     maximum_of,
13 |     minimum_of,
14 |     months,
15 |     weeks,
16 |     when,
17 |     years,
18 | )
19 | from ehrql.utils.log_utils import init_logging
20 | 
21 | 
22 | __version__ = Path(__file__).parent.joinpath("VERSION").read_text().strip()
23 | 
24 | 
25 | __all__ = [
26 |     "codelist_from_csv",
27 |     "INTERVAL",
28 |     "Measures",
29 |     "Dataset",
30 |     "Error",
31 |     "case",
32 |     "create_dataset",
33 |     "create_measures",
34 |     "days",
35 |     "show",
36 |     "maximum_of",
37 |     "minimum_of",
38 |     "months",
39 |     "weeks",
40 |     "when",
41 |     "years",
42 | ]
43 | 
44 | init_logging()
45 | 


--------------------------------------------------------------------------------
/ehrql/backends/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/backends/__init__.py


--------------------------------------------------------------------------------
/ehrql/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/docs/__init__.py


--------------------------------------------------------------------------------
/ehrql/docs/backends.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | import ehrql
 5 | import ehrql.tables
 6 | from ehrql.utils.module_utils import get_sibling_subclasses
 7 | 
 8 | from ..backends.base import SQLBackend
 9 | from .common import get_docstring
10 | 
11 | 
12 | SORT_ORDER = {k: i for i, k in enumerate(["TPP", "EMIS"])}
13 | 
14 | 
15 | def build_backends():
16 |     backend_classes = get_sibling_subclasses(SQLBackend)
17 | 
18 |     backends = []
19 |     for backend in backend_classes:
20 |         implements = [
21 |             namespace.__name__.removeprefix(ehrql.tables.__name__ + ".")
22 |             for namespace in backend.implements
23 |         ]
24 |         backends.append(
25 |             {
26 |                 "name": backend.display_name,
27 |                 "dotted_path": f"{backend.__module__}.{backend.__qualname__}",
28 |                 "file_path": relative_file_path(backend.__module__),
29 |                 "docstring": get_docstring(backend),
30 |                 "implements": implements,
31 |             }
32 |         )
33 | 
34 |     backends.sort(key=sort_key)
35 |     return backends
36 | 
37 | 
38 | def relative_file_path(module_dotted_path):
39 |     module_file = Path(sys.modules[module_dotted_path].__file__)
40 |     ehrql_base = Path(ehrql.__file__).parents[1]
41 |     return str(module_file.relative_to(ehrql_base))
42 | 
43 | 
44 | def sort_key(obj):
45 |     k = obj["name"]
46 |     return SORT_ORDER.get(k, float("+inf")), k
47 | 


--------------------------------------------------------------------------------
/ehrql/docs/render_includes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/docs/render_includes/__init__.py


--------------------------------------------------------------------------------
/ehrql/docs/render_includes/backends.py:
--------------------------------------------------------------------------------
 1 | BACKEND_TEMPLATE = """\
 2 | ## {name}
 3 | <small class="subtitle">
 4 |   <a href="https://github.com/opensafely-core/ehrql/blob/main/{file_path}">
 5 |     <code>{dotted_path}</code>
 6 |   </a>
 7 | </small>
 8 | 
 9 | {docstring}
10 | 
11 | This backend implements the following table schemas:
12 | 
13 | {schema_list}
14 | """
15 | 
16 | 
17 | def render_backends(backend_data):
18 |     return "\n".join(
19 |         BACKEND_TEMPLATE.format(
20 |             **backend,
21 |             schema_list="\n".join(
22 |                 f" * [{schema}](schemas/{schema}.md)"
23 |                 for schema in backend["implements"]
24 |             ),
25 |         )
26 |         for backend in backend_data
27 |     )
28 | 


--------------------------------------------------------------------------------
/ehrql/dummy_data/__init__.py:
--------------------------------------------------------------------------------
1 | from ehrql.dummy_data.generator import DummyDataGenerator
2 | 
3 | 
4 | __all__ = ["DummyDataGenerator"]
5 | 


--------------------------------------------------------------------------------
/ehrql/dummy_data_nextgen/__init__.py:
--------------------------------------------------------------------------------
1 | from ehrql.dummy_data_nextgen.generator import DummyDataGenerator
2 | from ehrql.dummy_data_nextgen.measures import DummyMeasuresDataGenerator
3 | 
4 | 
5 | __all__ = ["DummyDataGenerator", "DummyMeasuresDataGenerator"]
6 | 


--------------------------------------------------------------------------------
/ehrql/example-data/medications.csv:
--------------------------------------------------------------------------------
 1 | patient_id,date,dmd_code
 2 | 9,2023-11-08,29984111000001107
 3 | 15,2023-06-17,29984111000001107
 4 | 19,2022-06-02,34188411000001109
 5 | 28,2022-05-26,34188411000001109
 6 | 56,2022-11-27,29984111000001107
 7 | 59,2023-08-22,34188411000001109
 8 | 59,2022-11-08,29984111000001107
 9 | 61,2023-11-09,29984111000001107
10 | 67,2024-01-22,34188411000001109
11 | 84,2022-11-30,34188411000001109
12 | 87,2023-09-30,34188411000001109
13 | 92,2023-11-18,29984111000001107
14 | 93,2022-08-19,34188411000001109
15 | 99,2023-06-04,29984111000001107
16 | 


--------------------------------------------------------------------------------
/ehrql/example-data/ons_deaths.csv:
--------------------------------------------------------------------------------
 1 | patient_id,date,place,underlying_cause_of_death,cause_of_death_01,cause_of_death_02,cause_of_death_03,cause_of_death_04,cause_of_death_05,cause_of_death_06,cause_of_death_07,cause_of_death_08,cause_of_death_09,cause_of_death_10,cause_of_death_11,cause_of_death_12,cause_of_death_13,cause_of_death_14,cause_of_death_15
 2 | 8,1978-10-25,Hospital,J43.8,,,,,,,,,,,,,,,
 3 | 10,2024-09-28,Care Home,J10.1,,,,,,,,,,,,,,,
 4 | 31,1982-08-02,Hospital,A39.0,,,,,,,,,,,,,,,
 5 | 32,2022-02-26,Home,C91.1,I10.0,,,,,,,,,,,,,,
 6 | 40,2024-05-06,Hospital,C91.1,,,,,,,,,,,,,,,
 7 | 41,1991-02-15,Home,I21.0,I10.0,,,,,,,,,,,,,,
 8 | 46,2017-05-20,Hospital,I51.9,,,,,,,,,,,,,,,
 9 | 60,2024-05-29,Home,J43.8,,,,,,,,,,,,,,,
10 | 86,2024-09-14,Hospital,C81.0,,,,,,,,,,,,,,,
11 | 97,2024-09-19,Hospital,J10.0,,,,,,,,,,,,,,,
12 | 100,2012-03-08,Home,I60.0,,,,,,,,,,,,,,,
13 | 


--------------------------------------------------------------------------------
/ehrql/exceptions.py:
--------------------------------------------------------------------------------
 1 | class EHRQLException(Exception):
 2 |     """Base exception for EHRQL errors of all sorts.
 3 | 
 4 |     This is not yet reliably used everywhere it should be.
 5 |     """
 6 | 
 7 | 
 8 | class DummyDataException(EHRQLException):
 9 |     """Base class for dummy data errors."""
10 | 
11 | 
12 | class CannotGenerate(DummyDataException):
13 |     """Raised when a population definition cannot be satisfied.
14 | 
15 |     This may be because it is logically impossible, or it may be
16 |     logically possible but we were unable to do so.
17 |     """
18 | 


--------------------------------------------------------------------------------
/ehrql/file_formats/__init__.py:
--------------------------------------------------------------------------------
 1 | from ehrql.file_formats.base import FileValidationError
 2 | from ehrql.file_formats.main import (
 3 |     FILE_FORMATS,
 4 |     get_file_extension,
 5 |     read_rows,
 6 |     read_tables,
 7 |     split_directory_and_extension,
 8 |     write_rows,
 9 |     write_tables,
10 | )
11 | 
12 | 
13 | __all__ = [
14 |     "FileValidationError",
15 |     "FILE_FORMATS",
16 |     "get_file_extension",
17 |     "read_rows",
18 |     "read_tables",
19 |     "split_directory_and_extension",
20 |     "write_rows",
21 |     "write_tables",
22 | ]
23 | 


--------------------------------------------------------------------------------
/ehrql/file_formats/console.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Handles writing rows/tables to the console for local development and debugging.
 3 | 
 4 | At present, this just uses the CSV writer but there's scope for using something a bit
 5 | prettier and more readable here in future.
 6 | """
 7 | 
 8 | import sys
 9 | 
10 | from ehrql.file_formats.csv import write_rows_csv_lines
11 | 
12 | 
13 | def write_rows_console(rows, column_specs):
14 |     write_rows_csv_lines(sys.stdout, rows, column_specs)
15 | 
16 | 
17 | def write_tables_console(tables, table_specs):
18 |     write_table_names = len(table_specs) > 1
19 |     first_table = True
20 |     for rows, (table_name, column_specs) in zip(tables, table_specs.items()):
21 |         if first_table:
22 |             first_table = False
23 |         else:
24 |             # Add whitespace between tables
25 |             sys.stdout.write("\n\n")
26 |         if write_table_names:
27 |             sys.stdout.write(f"{table_name}\n")
28 |         write_rows_console(rows, column_specs)
29 | 


--------------------------------------------------------------------------------
/ehrql/file_formats/validation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/file_formats/validation.py


--------------------------------------------------------------------------------
/ehrql/measures/__init__.py:
--------------------------------------------------------------------------------
 1 | from ehrql.dummy_data.measures import DummyMeasuresDataGenerator
 2 | from ehrql.measures.calculate import (
 3 |     get_column_specs_for_measures,
 4 |     get_measure_results,
 5 | )
 6 | from ehrql.measures.disclosure_control import apply_sdc_to_measure_results
 7 | from ehrql.measures.measures import INTERVAL, Measures, create_measures
 8 | 
 9 | 
10 | __all__ = [
11 |     "get_column_specs_for_measures",
12 |     "get_measure_results",
13 |     "apply_sdc_to_measure_results",
14 |     "DummyMeasuresDataGenerator",
15 |     "INTERVAL",
16 |     "Measures",
17 |     "create_measures",
18 | ]
19 | 


--------------------------------------------------------------------------------
/ehrql/measures/disclosure_control.py:
--------------------------------------------------------------------------------
 1 | """Statistical Disclosure Control (SDC)
 2 | 
 3 | For more information, see:
 4 | https://docs.opensafely.org/releasing-files/
 5 | """
 6 | 
 7 | SUPPRESSION_THRESHOLD = 7
 8 | ROUNDING_MULTIPLE = 5
 9 | 
10 | 
11 | def apply_sdc(value):
12 |     assert value >= 0
13 |     assert isinstance(value, int)
14 |     value = 0 if value <= SUPPRESSION_THRESHOLD else value
15 |     value = int(ROUNDING_MULTIPLE * round(value / ROUNDING_MULTIPLE, ndigits=0))
16 |     return value
17 | 
18 | 
19 | def apply_sdc_to_measure_results(results):
20 |     for result in results:
21 |         (
22 |             measure_name,
23 |             interval_start,
24 |             interval_end,
25 |             _,
26 |             old_numerator,
27 |             old_denominator,
28 |             *group_names,
29 |         ) = result
30 |         numerator = apply_sdc(old_numerator)
31 |         denominator = apply_sdc(old_denominator)
32 |         ratio = numerator / denominator if denominator else None
33 |         yield (
34 |             measure_name,
35 |             interval_start,
36 |             interval_end,
37 |             ratio,
38 |             numerator,
39 |             denominator,
40 |             *group_names,
41 |         )
42 | 


--------------------------------------------------------------------------------
/ehrql/query_engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/query_engines/__init__.py


--------------------------------------------------------------------------------
/ehrql/query_engines/local_file.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ehrql.file_formats import read_tables
 4 | from ehrql.query_engines.in_memory import InMemoryQueryEngine
 5 | from ehrql.query_engines.in_memory_database import InMemoryDatabase
 6 | from ehrql.query_model.column_specs import get_column_specs_from_schema
 7 | from ehrql.query_model.introspection import get_table_nodes
 8 | 
 9 | 
10 | class LocalFileQueryEngine(InMemoryQueryEngine):
11 |     """
12 |     Subclass of the in-memory engine which loads its data from files
13 |     """
14 | 
15 |     database = None
16 | 
17 |     def get_results_tables(self, dataset):
18 |         # Given the dataset supplied determine the tables used and load the associated
19 |         # data into the database
20 |         self.populate_database(
21 |             get_table_nodes(dataset),
22 |         )
23 |         # Run the query as normal
24 |         return super().get_results_tables(dataset)
25 | 
26 |     def populate_database(self, table_nodes, allow_missing_columns=True):
27 |         table_specs = {
28 |             table.name: get_column_specs_from_schema(table.schema)
29 |             for table in table_nodes
30 |         }
31 |         table_rows = read_tables(
32 |             Path(self.dsn),
33 |             table_specs,
34 |             allow_missing_columns=allow_missing_columns,
35 |         )
36 |         table_data = dict(zip(table_nodes, table_rows))
37 |         self.database = InMemoryDatabase(table_data)
38 | 


--------------------------------------------------------------------------------
/ehrql/query_engines/sqlite_dialect.py:
--------------------------------------------------------------------------------
 1 | import sqlean
 2 | from sqlalchemy.dialects.sqlite.pysqlite import SQLiteDialect_pysqlite
 3 | 
 4 | 
 5 | class SQLiteDialect(SQLiteDialect_pysqlite):
 6 |     supports_statement_cache = False
 7 | 
 8 |     @classmethod
 9 |     def import_dbapi(cls):
10 |         # Use sqlean rather than the system version
11 |         sqlean.extensions.enable("math")
12 |         return sqlean.dbapi2
13 | 
14 |     def do_on_connect(self, connection):
15 |         # Set the per-connection flag which makes LIKE queries case-sensitive
16 |         connection.execute("PRAGMA case_sensitive_like = 1;")
17 | 
18 |     def on_connect(self):
19 |         # `on_connect` must return a callable to be executed
20 |         return self.do_on_connect
21 | 


--------------------------------------------------------------------------------
/ehrql/query_engines/trino_dialect.py:
--------------------------------------------------------------------------------
 1 | from trino.sqlalchemy.compiler import (
 2 |     TrinoDDLCompiler as BaseTrinoDDLCompiler,
 3 | )
 4 | from trino.sqlalchemy.compiler import (
 5 |     TrinoTypeCompiler as BaseTrinoTypeCompiler,
 6 | )
 7 | from trino.sqlalchemy.dialect import TrinoDialect as BaseTrinoDialect
 8 | 
 9 | 
10 | class TrinoDDLCompiler(BaseTrinoDDLCompiler):
11 |     def get_column_specification(self, column, **kwargs):
12 |         """
13 |         Prevent SQLAlchemy from trying to create NOT NULL column constraints, which
14 |         some Trino connectors don't support (particularly the memory connector,
15 |         which is used for tests).
16 | 
17 |         This is only required by the SQLAlchemy ORM layer and therefore only
18 |         used in test.
19 |         """
20 |         colspec = super().get_column_specification(column, **kwargs)
21 |         colspec = colspec.replace(" NOT NULL", "")
22 |         return colspec
23 | 
24 |     def visit_primary_key_constraint(self, constraint, **kw):
25 |         """
26 |         Prevent SQLAlchemy from trying to create PRIMARY KEY constraints, which
27 |         some Trino connectors don't support (particularly the memory connector,
28 |         which is used for tests).
29 | 
30 |         This is only required by the SQLAlchemy ORM layer and therefore only
31 |         used in test.
32 |         """
33 |         return ""
34 | 
35 | 
36 | class TrinoTypeCompiler(BaseTrinoTypeCompiler):
37 |     def visit_FLOAT(self, type_, **kw):
38 |         """Make SQLAlchemy use 64-bit precision for floats."""
39 | 
40 |         assert type_.precision is None
41 |         return self.visit_DOUBLE(type_, **kw)
42 | 
43 | 
44 | class TrinoDialect(BaseTrinoDialect):
45 |     supports_statement_cache = True
46 |     ddl_compiler = TrinoDDLCompiler
47 |     type_compiler = TrinoTypeCompiler
48 | 
49 |     # Tell SQLAlchemy it can used batched insert options for faster test setup
50 |     supports_multivalues_insert = True
51 |     use_insertmanyvalues = True
52 |     use_insertmanyvalues_wo_returning = True
53 | 


--------------------------------------------------------------------------------
/ehrql/query_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/query_model/__init__.py


--------------------------------------------------------------------------------
/ehrql/query_model/introspection.py:
--------------------------------------------------------------------------------
 1 | from ehrql.query_model.nodes import (
 2 |     InlinePatientTable,
 3 |     SelectPatientTable,
 4 |     SelectTable,
 5 |     get_input_nodes,
 6 | )
 7 | 
 8 | 
 9 | def all_nodes(tree):  # pragma: no cover
10 |     nodes = []
11 | 
12 |     for subnode in get_input_nodes(tree):
13 |         for node in all_nodes(subnode):
14 |             nodes.append(node)
15 |     return [tree] + nodes
16 | 
17 | 
18 | def count_nodes(tree):  # pragma: no cover
19 |     return len(all_nodes(tree))
20 | 
21 | 
22 | def node_types(tree):  # pragma: no cover
23 |     return [type(node) for node in all_nodes(tree)]
24 | 
25 | 
26 | def all_unique_nodes(*nodes):
27 |     found = set()
28 |     for node in nodes:
29 |         gather_unique_nodes(node, found)
30 |     return found
31 | 
32 | 
33 | def gather_unique_nodes(node, found):
34 |     found.add(node)
35 |     for subnode in get_input_nodes(node):
36 |         if subnode not in found:
37 |             gather_unique_nodes(subnode, found)
38 | 
39 | 
40 | def get_table_nodes(*nodes):
41 |     return {
42 |         subnode
43 |         for subnode in all_unique_nodes(*nodes)
44 |         if isinstance(subnode, SelectTable | SelectPatientTable)
45 |     }
46 | 
47 | 
48 | def all_inline_patient_ids(*nodes):
49 |     """
50 |     Given some nodes, return a set of all the patient IDs contained in any inline tables
51 |     referenced by those nodes
52 |     """
53 |     patient_ids = set()
54 |     for node in all_unique_nodes(*nodes):
55 |         if isinstance(node, InlinePatientTable):
56 |             patient_ids.update(row[0] for row in node.rows)
57 |     return patient_ids
58 | 


--------------------------------------------------------------------------------
/ehrql/sqlalchemy_types.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import sqlalchemy
 4 | 
 5 | 
 6 | TYPE_MAP = {
 7 |     bool: sqlalchemy.Boolean,
 8 |     datetime.date: sqlalchemy.Date,
 9 |     float: sqlalchemy.Float,
10 |     int: sqlalchemy.Integer,
11 |     str: sqlalchemy.String,
12 | }
13 | 
14 | 
15 | def type_from_python_type(type_):
16 |     "Return the SQLAlchemy Type for a given Python type"
17 |     if hasattr(type_, "_primitive_type"):
18 |         lookup_type = type_._primitive_type()
19 |     else:
20 |         lookup_type = type_
21 |     try:
22 |         return TYPE_MAP[lookup_type]
23 |     except KeyError:
24 |         raise TypeError(f"Unsupported column type: {type_}")
25 | 


--------------------------------------------------------------------------------
/ehrql/tables/__init__.py:
--------------------------------------------------------------------------------
 1 | from ehrql.query_language import (
 2 |     EventFrame,
 3 |     PatientFrame,
 4 |     Series,
 5 |     table,
 6 |     table_from_file,
 7 |     table_from_rows,
 8 | )
 9 | from ehrql.query_model.table_schema import Constraint
10 | 
11 | 
12 | __all__ = [
13 |     "Constraint",
14 |     "EventFrame",
15 |     "PatientFrame",
16 |     "Series",
17 |     "table",
18 |     "table_from_rows",
19 |     "table_from_file",
20 | ]
21 | 


--------------------------------------------------------------------------------
/ehrql/tables/raw/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/tables/raw/__init__.py


--------------------------------------------------------------------------------
/ehrql/tables/raw/emis.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This schema defines the data (both primary care and externally linked) available in the
 3 | OpenSAFELY-EMIS backend. For more information about this backend, see
 4 | "[EMIS Primary Care](https://docs.opensafely.org/data-sources/emis/)".
 5 | 
 6 | The data provided by this schema are minimally transformed. They are very close to the
 7 | data provided by the underlying database tables. They are provided for data development
 8 | and data curation purposes.
 9 | """
10 | 
11 | from ehrql.tables.raw.core import ons_deaths
12 | 
13 | 
14 | __all__ = [
15 |     "ons_deaths",
16 | ]
17 | 


--------------------------------------------------------------------------------
/ehrql/tables/smoketest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This tiny schema is used to write a [minimal dataset definition][smoketest_repo] that
 3 | can function as a basic end-to-end test (or "smoke test") of the OpenSAFELY platform
 4 | across all available backends.
 5 | 
 6 | [smoketest_repo]: https://github.com/opensafely/test-age-distribution
 7 | """
 8 | 
 9 | import datetime
10 | 
11 | from ehrql.tables import Constraint, PatientFrame, Series, table
12 | 
13 | 
14 | __all__ = [
15 |     "patients",
16 | ]
17 | 
18 | 
19 | @table
20 | class patients(PatientFrame):
21 |     date_of_birth = Series(
22 |         datetime.date,
23 |         description=(
24 |             "Patient's year and month of birth, provided in format YYYY-MM-01. "
25 |             "The day will always be the first of the month."
26 |         ),
27 |         constraints=[Constraint.FirstOfMonth(), Constraint.NotNull()],
28 |     )
29 | 


--------------------------------------------------------------------------------
/ehrql/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/ehrql/utils/__init__.py


--------------------------------------------------------------------------------
/ehrql/utils/docs_utils.py:
--------------------------------------------------------------------------------
1 | def exclude_from_docs(fn):
2 |     fn.exclude_from_docs = True
3 |     return fn
4 | 


--------------------------------------------------------------------------------
/ehrql/utils/log_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import logging.config
 3 | import os
 4 | 
 5 | 
 6 | class EHRQLFormatter(logging.Formatter):
 7 |     def format(self, record):
 8 |         record.levelname_lower = record.levelname.lower()
 9 |         return logging.Formatter.format(self, record)
10 | 
11 | 
12 | CONFIG = {
13 |     "version": 1,
14 |     "disable_existing_loggers": False,
15 |     "formatters": {
16 |         "formatter": {
17 |             "()": EHRQLFormatter,
18 |             "format": "[{levelname_lower:<7}] {message}",
19 |             "datefmt": "%Y-%m-%d %H:%M:%S",
20 |             "style": "{",
21 |         }
22 |     },
23 |     "handlers": {
24 |         "console": {
25 |             "level": "DEBUG",
26 |             "class": "logging.StreamHandler",
27 |             "formatter": "formatter",
28 |         }
29 |     },
30 |     "root": {
31 |         "handlers": ["console"],
32 |         "level": os.getenv("LOG_LEVEL", "CRITICAL"),
33 |     },
34 |     "loggers": {
35 |         "sqlalchemy.engine": {
36 |             "level": "INFO" if os.getenv("LOG_SQL") else "WARN",
37 |         },
38 |     },
39 | }
40 | 
41 | 
42 | def init_logging():
43 |     logging.config.dictConfig(CONFIG)
44 | 
45 | 
46 | def kv(kv_pairs):
47 |     """Generate a string of kv pairs in space separated k=v format."""
48 |     return " ".join("{}={}".format(k, v) for k, v in kv_pairs.items())
49 | 


--------------------------------------------------------------------------------
/ehrql/utils/math_utils.py:
--------------------------------------------------------------------------------
 1 | def truediv(lhs, rhs):
 2 |     """
 3 |     Implement Python truediv behaviour but return None when dividing by zero.
 4 |     """
 5 |     if rhs == 0:
 6 |         return None
 7 |     else:
 8 |         return lhs / rhs
 9 | 
10 | 
11 | def floordiv(lhs, rhs):
12 |     """
13 |     Implement Python floordiv behaviour but return None when dividing by zero.
14 |     """
15 |     if rhs == 0:
16 |         return None
17 |     else:
18 |         return int(lhs // rhs)
19 | 
20 | 
21 | def get_grouping_level_as_int(all_groups, group_subset):
22 |     # Calculate the level of grouping for a subset of group by groups in the
23 |     # same way as the grouping ID in sqlserver is calculated - i.e. integer representation of a string of
24 |     # 0s and 1s for each column, where a 1 indicates that the column is NOT a grouping column
25 |     # https://learn.microsoft.com/en-us/)sql/t-sql/functions/grouping-id-transact-sql?view=sql-server-ver16
26 |     if not all_groups:
27 |         return 0
28 |     return int(
29 |         "".join(["0" if group in group_subset else "1" for group in all_groups]),
30 |         2,
31 |     )
32 | 


--------------------------------------------------------------------------------
/ehrql/utils/module_utils.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | def get_sibling_subclasses(cls):
 7 |     """
 8 |     Return all subclasses of `cls` defined in modules which are siblings of the module
 9 |     containing `cls`
10 | 
11 |     For example, sibling subclasses of the class `ehrql.backends.base.SQLBackend`
12 |     include:
13 | 
14 |         ehrql.backends.tpp.TPPBackend
15 |         ...
16 | 
17 |     This is useful for tests and for generating documentation, but isn't intended for
18 |     use in runtime code.
19 |     """
20 |     module_name = cls.__module__.rpartition(".")[0]
21 |     module = sys.modules[module_name]
22 |     return [
23 |         obj
24 |         for submodule in get_submodules(module)
25 |         for obj in vars(submodule).values()
26 |         if is_proper_subclass(obj, cls)
27 |     ]
28 | 
29 | 
30 | def get_submodules(module):
31 |     """
32 |     Given a module yield all its submodules recursively
33 |     """
34 |     submodule_names = [
35 |         f"{module.__name__}.{f.stem}"
36 |         for f in Path(module.__file__).parent.glob("*.py")
37 |         if f.name != "__init__.py"
38 |     ]
39 |     subpackage_names = [
40 |         f"{module.__name__}.{f.parent.name}"
41 |         for f in Path(module.__file__).parent.glob("*/__init__.py")
42 |     ]
43 |     for name in submodule_names:
44 |         yield importlib.import_module(name)
45 |     for name in subpackage_names:
46 |         subpackage = importlib.import_module(name)
47 |         yield subpackage
48 |         yield from get_submodules(subpackage)
49 | 
50 | 
51 | def is_proper_subclass(value, cls):
52 |     try:
53 |         return issubclass(value, cls) and value is not cls
54 |     except TypeError:
55 |         return False
56 | 
57 | 
58 | def get_all_subclasses(cls):
59 |     for subclass in cls.__subclasses__():
60 |         yield subclass
61 |         yield from get_all_subclasses(subclass)
62 | 


--------------------------------------------------------------------------------
/ehrql/utils/sequence_utils.py:
--------------------------------------------------------------------------------
1 | def ordered_set(sequence):
2 |     """
3 |     Deduplicates a sequence, maintaining order
4 |     """
5 |     return list(dict.fromkeys(sequence))
6 | 


--------------------------------------------------------------------------------
/ehrql/utils/string_utils.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | 
 3 | 
 4 | def strip_indent(s):
 5 |     """
 6 |     Remove indentation from a multiline string
 7 | 
 8 |     This is especially useful for taking docstrings and displaying them as markdown.
 9 |     Note that before de-indenting we strip leading newlines but not leading whitespace
10 |     more generally. This allow us to have the opening quotes on a different line from
11 |     the text body.
12 |     """
13 |     return textwrap.dedent(s.lstrip("\n")).strip()
14 | 


--------------------------------------------------------------------------------
/hooks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/hooks/__init__.py


--------------------------------------------------------------------------------
/hooks/parent_snippets.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def on_page_markdown(markdown, page, **kwargs):
 5 |     """
 6 |     parent_snippet markers are snippets that are intended to be replaced in the parent
 7 |     site with appropriate snippet notation.  The snippets themselves do not live in
 8 |     this repo.
 9 | 
10 |     on_page_* methods are called for each Page in a mkdocs site and can modify the
11 |     markdown they are given as input.  We're using this method to look for the
12 |     parent_includes markers and replace them with a note box that indicates in the
13 |     built docs that this snippet will be replaced in the full docs build.
14 | 
15 |     For example:
16 |         !!! parent_snippet:'includes/glossary.md'
17 | 
18 |     will be replaced with:
19 |         !!! note "TO BE REPLACED IN FULL DOCS BUILD
20 |             This snippet will be replaced in the main docs with the parent file 'includes/glossary.md'
21 | 
22 |     This allows docs imported from other repos (e.g. ehrql) to reference snippets
23 |     in the parent docs, such as the glossary.
24 |     """
25 |     parent_snippets = set(re.findall(r"!!! parent_snippet:.+\n", markdown))
26 |     for parent_snippet in parent_snippets:
27 |         markdown = markdown.replace(
28 |             parent_snippet,
29 |             '\n\n!!! note "TO BE REPLACED IN FULL DOCS BUILD"\n\n\tThis snippet will be replaced in the main docs '
30 |             f"with the parent file {parent_snippet.lstrip('!!! parent_snippet:')}",
31 |         )
32 |     return markdown
33 | 


--------------------------------------------------------------------------------
/pyproject.minimal.toml:
--------------------------------------------------------------------------------
 1 | # This contains just the minimal configuration needed to be able to install the
 2 | # script entrypoints. We use this in the Dockerfile to be able to set up a
 3 | # virtualenv with all the right scripts pointing to the right entrypoints
 4 | # without creating a dependency on the whole project state so we avoid having
 5 | # to rebuild the virtualenv every time any file changes.
 6 | #
 7 | # A test at `tests/unit/test_pyproject_minimal.py` makes sure that this file
 8 | # doesn't get out of sync with the original.
 9 | 
10 | [project]
11 | name = "opensafely-ehrql"
12 | version = "2+local"
13 | 
14 | [project.scripts]
15 | ehrql = "ehrql.__main__:entrypoint"
16 | 


--------------------------------------------------------------------------------
/requirements.dev.in:
--------------------------------------------------------------------------------
 1 | --constraint requirements.prod.txt
 2 | 
 3 | # Additional dev requirements
 4 | # To generate a requirements file that includes both prod and dev requirements, run:
 5 | # pip-compile --generate-hashes --output-file=requirements.dev.txt requirements.dev.in
 6 | 
 7 | docker
 8 | # Pinning hypothesis because something in 6.131.14 has caused the tests to take
 9 | # about 2x longer than they did before
10 | # https://github.com/opensafely-core/ehrql/issues/2456
11 | hypothesis==6.131.13
12 | pip-tools
13 | pre-commit
14 | pyright[nodejs]
15 | pytest
16 | pytest-cov
17 | pytest-mock
18 | pytest-xdist
19 | ruff
20 | toml
21 | 
22 | # docs
23 | mkdocs
24 | mkdocs-material
25 | 
26 | # The following is a work-around for a bug in pip-compile. For more information, see:
27 | # https://github.com/jazzband/pip-tools/issues/2176
28 | pip==25.0.1
29 | 


--------------------------------------------------------------------------------
/requirements.prod.in:
--------------------------------------------------------------------------------
 1 | pyarrow
 2 | sqlalchemy
 3 | 
 4 | # Database driver for MS-SQL
 5 | pymssql
 6 | 
 7 | # Trino python client and database driver
 8 | trino
 9 | 
10 | # Gives us isolation from the system version of SQLite and means we don't
11 | # need to worry about e.g. some versions of SQLite missing the `FLOOR`
12 | # function.
13 | sqlean.py
14 | 
15 | # For graphing query graphs
16 | networkx
17 | pydot
18 | 


--------------------------------------------------------------------------------
/scripts/.gitignore:
--------------------------------------------------------------------------------
1 | /environ.env
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/__init__.py


--------------------------------------------------------------------------------
/tests/acceptance/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/acceptance/__init__.py


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/analysis/codelists.py:
--------------------------------------------------------------------------------
 1 | from ehrql import codelist_from_csv
 2 | 
 3 | colorectal_symptom_codes = codelist_from_csv(
 4 |     "codelists/phc-symptoms-colorectal-cancer.csv", column="code"
 5 | )
 6 | 
 7 | colorectal_diagnosis_codes_snomed = codelist_from_csv(
 8 |     "codelists/phc-phc-colorectal-cancer-snomed.csv", column="code"
 9 | )
10 | 
11 | colorectal_referral_codes = codelist_from_csv(
12 |     "codelists/phc-2ww-referral-colorectal.csv", column="code"
13 | )
14 | 
15 | ida_codes = codelist_from_csv(
16 |     "codelists/phc-symptom-colorectal-ida.csv", column="code"
17 | )
18 | 
19 | cibh_codes = codelist_from_csv(
20 |     "codelists/phc-symptom-colorectal-cibh.csv", column="code"
21 | )
22 | 
23 | prbleeding_codes = codelist_from_csv(
24 |     "codelists/phc-symptom-colorectal-pr-bleeding.csv", column="code"
25 | )
26 | 
27 | wl_codes = codelist_from_csv(
28 |     "codelists/phc-symptom-colorectal-wl.csv", column="code"
29 | )
30 | 
31 | abdomass_codes = codelist_from_csv(
32 |     "codelists/phc-symptom-lowergi-abdo-mass.csv", column="code"
33 | )
34 | 
35 | abdopain_codes = codelist_from_csv(
36 |     "codelists/phc-symptom-lowergi-abdo-pain.csv", column="code"
37 | )
38 | 
39 | anaemia_codes = codelist_from_csv(
40 |     "codelists/phc-symptom-lowergi-anaemia.csv", column="code"
41 | )
42 | 
43 | fit_codes = codelist_from_csv(
44 |     "codelists/phc-fit-test.csv", column="code"
45 | )
46 | 
47 | ethnicity_codes_16 = codelist_from_csv(
48 |     "codelists/opensafely-ethnicity-snomed-0removed.csv",
49 |     column="snomedcode",
50 |     category_column="Grouping_16",
51 | )
52 | 
53 | ethnicity_codes_6 = codelist_from_csv(
54 |     "codelists/opensafely-ethnicity-snomed-0removed.csv",
55 |     column="snomedcode",
56 |     category_column="Grouping_6",
57 | )
58 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/codelists/phc-2ww-referral-colorectal.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 276401000000108,Fast track referral for suspected colorectal cancer
3 | 276411000000105,Urgent cancer referral - colorectal 
4 | 276421000000104,Urgent cancer referral - colorectal 
5 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/codelists/phc-colorectal-cancer-icd10.csv:
--------------------------------------------------------------------------------
 1 | code,term
 2 | C18,Malignant neoplasm of colon
 3 | C180,Malignant neoplasm: Caecum
 4 | C181,Malignant neoplasm: Appendix
 5 | C182,Malignant neoplasm: Ascending colon
 6 | C183,Malignant neoplasm: Hepatic flexure
 7 | C184,Malignant neoplasm: Transverse colon
 8 | C185,Malignant neoplasm: Splenic flexure
 9 | C186,Malignant neoplasm: Descending colon
10 | C187,Malignant neoplasm: Sigmoid colon
11 | C188,Malignant neoplasm: Overlapping lesion of colon
12 | C189,"Malignant neoplasm: Colon, unspecified"
13 | C19,Malignant neoplasm of rectosigmoid junction
14 | C20,Malignant neoplasm of rectum
15 | C21,Malignant neoplasm of anus and anal canal
16 | C210,"Malignant neoplasm: Anus, unspecified"
17 | C211,Malignant neoplasm: Anal canal
18 | C212,Malignant neoplasm: Cloacogenic zone
19 | C218,"Malignant neoplasm: Overlapping lesion of rectum, anus and anal canal"
20 | C785,Secondary malignant neoplasm of large intestine and rectum
21 | D374,Neoplasm of uncertain or unknown behaviour: Colon
22 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/codelists/phc-fit-test.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 1015401000000102,Faecal occult blood test
3 | 1049361000000101,Quantitative faecal immunochemical test
4 | 1049371000000108,Quantitative faecal immunochemical test 
5 | 389076003,Fecal occult blood: trace
6 | 59614000,Occult blood in stools
7 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/codelists/phc-symptom-colorectal-ida.csv:
--------------------------------------------------------------------------------
 1 | code,term
 2 | 191127009,Anaemia due to chronic blood loss: [iron deficiency] or [normocytic]
 3 | 191128004,Iron deficiency anemia due to dietary causes
 4 | 191135007,Chlorotic anemia
 5 | 191408005,[X]Other iron deficiency anemias
 6 | 234351006,Iron deficiency anaemia due to chronic blood loss
 7 | 371315009,Iron deficiency anemia secondary to inadequate dietary iron intake
 8 | 397761000000103,[X]Other iron deficiency anaemias
 9 | 413533008,Anemia due to chronic blood loss
10 | 42626004,Iron deficiency anemia secondary to chronic blood loss
11 | 44252001,Blood loss anemia
12 | 598461000000107,Iron deficiency anemia NOS
13 | 610661000000100,Other specified iron deficiency anemia NOS
14 | 610671000000107,Unspecified iron deficiency anemia
15 | 661301000000100,Other specified iron deficiency anemia
16 | 717948004,Acquired iron deficiency anemia due to increased iron requirement
17 | 722005000,Iron-refractory iron deficiency anemia
18 | 724556004,Iron deficiency anemia due to blood loss
19 | 724557008,Acquired iron deficiency anemia due to decreased absorption
20 | 80126007,Plummer-Vinson syndrome
21 | 87522002,Iron deficiency anemia
22 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/codelists/phc-symptom-colorectal-pr-bleeding.csv:
--------------------------------------------------------------------------------
 1 | code,term
 2 | 1085171000119108,Rectal hemorrhage due to chronic ulcerative pancolitis
 3 | 1085221000119103,Rectal hemorrhage due to chronic ulcerative proctitis
 4 | 1085271000119102,Rectal hemorrhage due to chronic ulcerative rectosigmoiditis
 5 | 1085431000119101,Rectal hemorrhage due to inflammatory polyps of colon
 6 | 1085791000119105,Rectal hemorrhage due to Crohn's disease of large intestine
 7 | 1085841000119108,Rectal hemorrhage due to Crohn's disease of small and large intestines
 8 | 1085891000119100,Rectal hemorrhage due to Crohn's disease of small intestine
 9 | 1085941000119104,Rectal hemorrhage due to Crohn's disease
10 | 1092881000119105,Rectal hemorrhage due to ulcerative colitis
11 | 12063002,Rectal hemorrhage
12 | 164451000000109,Painful rectal bleeding
13 | 164461000000107,Painless rectal bleeding
14 | 171731000000100,Painful rectal bleeding
15 | 171741000000109,Painless rectal bleeding
16 | 266464001,Hemorrhage of rectum and anus
17 | 414991007,Painful rectal bleeding
18 | 414992000,Painless rectal bleeding
19 | 571611000000105,Hemorrhage of rectum and anus NOS
20 | 721690003,Acute hemorrhagic ulcer of rectum
21 | 981008,Hemorrhagic proctitis
22 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/mainroute_cancer/codelists/phc-symptom-colorectal-wl.csv:
--------------------------------------------------------------------------------
 1 | code,term
 2 | 139089007,Weight decreasing
 3 | 139091004,Weight loss (& abnormal)
 4 | 158271000,[D]Abnormal loss of weight
 5 | 161832001,Weight decreasing
 6 | 161834000,Abnormal weight loss (& [symptom])
 7 | 198511000000103,Complaining of weight loss
 8 | 206919000,[D]Abnormal loss of weight
 9 | 213791000000109,Complaining of weight loss
10 | 213801000000108,Complaining of weight loss
11 | 23712001,Abnormal decrease in weight
12 | 267024001,Abnormal weight loss
13 | 267158006,Weight loss (& abnormal)
14 | 422868009,Unexplained weight loss
15 | 448765001,Unintentional weight loss
16 | 496901000000107,[D]Abnormal loss of weight
17 | 511461000000103,Unexplained weight loss
18 | 699205002,Involuntary weight loss
19 | 768571000000103,Unintentional weight loss
20 | 768581000000101,Unintentional weight loss 
21 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/analysis/dataset_definition_dm017.py:
--------------------------------------------------------------------------------
 1 | from ehrql import INTERVAL, Measures, months
 2 | from ehrql.tables.tpp import patients
 3 | 
 4 | from dm_dataset import (
 5 |     make_dm_dataset,
 6 |     get_registration_status,
 7 |     get_dm_reg_r1,
 8 |     get_dm_reg_r2,
 9 | )
10 | 
11 | index_date = INTERVAL.start_date
12 | 
13 | # Instantiate dataset and define clinical variables
14 | dataset = make_dm_dataset(index_date=index_date)
15 | 
16 | # Define registration status
17 | # NOTE: this is not identical to GMS registration status
18 | has_registration = get_registration_status(index_date)
19 | 
20 | # Define diabetes register (DM_REG) rules:
21 | dataset.dm_reg_r1 = get_dm_reg_r1(dataset)
22 | dataset.dm_reg_r2 = get_dm_reg_r2(dataset)
23 | 
24 | # Define select rule 2
25 | has_dm_reg_select_r2 = dataset.dm_reg_r1 & ~dataset.dm_reg_r2
26 | 
27 | # Define DM017 numerator and denominator
28 | dm017_numerator = has_dm_reg_select_r2
29 | dm017_denominator = has_registration
30 | 
31 | # Define measures
32 | measures = Measures()
33 | 
34 | measures.define_measure(
35 |     name="dm017",
36 |     numerator=dm017_numerator,
37 |     denominator=dm017_denominator,
38 |     group_by={"sex": patients.sex},
39 |     intervals=months(12).starting_on("2022-03-01"),
40 | )
41 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/analysis/variable_lib_helper.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | from functools import reduce
 3 | 
 4 | from ehrql.codes import ICD10Code
 5 | from ehrql import case, when
 6 | from ehrql.tables import tpp as schema
 7 | 
 8 | 
 9 | def first_matching_event(events, codelist, where=True):
10 |     return (
11 |         events.where(where)
12 |         .where(events.snomedct_code.is_in(codelist))
13 |         .sort_by(events.date)
14 |         .first_for_patient()
15 |     )
16 | 
17 | 
18 | def last_matching_event(events, codelist, where=True):
19 |     return (
20 |         events.where(where)
21 |         .where(events.snomedct_code.is_in(codelist))
22 |         .sort_by(events.date)
23 |         .last_for_patient()
24 |     )
25 | 
26 | 
27 | def age_as_of(date):
28 |     return (date - schema.patients.date_of_birth).years
29 | 
30 | 
31 | # TODO this is not exactly the same as died_from_any_cause().
32 | # Note that this function only checks the patient table
33 | def died_as_of(date):
34 |     return schema.patients.date_of_death.is_not_null() & (
35 |         schema.patients.date_of_death < date
36 |     )
37 | 
38 | 
39 | def _registrations_overlapping_period(start_date, end_date):
40 |     regs = schema.practice_registrations
41 |     return regs.where(
42 |         regs.start_date.is_on_or_before(start_date)
43 |         & (regs.end_date.is_after(end_date) | regs.end_date.is_null())
44 |     )
45 | 
46 | 
47 | def practice_registration_as_of(date):
48 |     regs = _registrations_overlapping_period(date, date)
49 |     return regs.sort_by(regs.start_date, regs.end_date).first_for_patient()
50 | 
51 | 
52 | def get_events_on_or_between(events, codelist, start_date, end_date, where=True):
53 |     return (
54 |         events.where(where)
55 |         .where(events.snomedct_code.is_in(codelist))
56 |         .where(events.date.is_on_or_between(start_date, end_date))
57 |     )
58 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-bldtestdec_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 116471000119100,Blood test declined
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dminvite_cod.csv:
--------------------------------------------------------------------------------
 1 | code,term
 2 | 1066911000000100,Diabetes monitoring short message service text message first invitation
 3 | 1066921000000106,Diabetes monitoring short message service text message second invitation
 4 | 1066931000000108,Diabetes monitoring short message service text message third invitation
 5 | 1083111000000108,Diabetes monitoring invitation email
 6 | 1109921000000106,Quality and Outcomes Framework quality indicator-related care invitation
 7 | 1110921000000100,Quality and Outcomes Framework diabetes mellitus quality indicator-related care invitation
 8 | 143401000000102,Quality and Outcomes Framework diabetes mellitus quality indicator-related care invitation using preferred method of communication
 9 | 185756006,Diabetes monitoring first letter
10 | 185757002,Diabetes monitoring second letter
11 | 185758007,Diabetes monitoring third letter
12 | 185759004,Diabetes monitoring verbal invite
13 | 185760009,Diabetes monitoring telephone invite
14 | 310425007,Diabetes monitoring invitation
15 | 705072004,Diabetes monitoring invitation by short message service text messaging
16 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmmax_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 407569005,Patient on maximal tolerated therapy for diabetes
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmpcadec_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 716031000000106,Excepted from diabetes quality indicators - informed dissent
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmpcapu_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 717421000000100,Excepted from diabetes quality indicators - patient unsuitable
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-dmres_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 315051004,Diabetes resolved
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-ifcchbam_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 1049301000000100,Haemoglobin A1c level (diagnostic reference range) - International Federation of Clinical Chemistry and Laboratory Medicine standardised
3 | 1049321000000109,Haemoglobin A1c level (monitoring ranges) - International Federation of Clinical Chemistry and Laboratory Medicine standardised
4 | 999791000000106,Haemoglobin A1c level - International Federation of Clinical Chemistry and Laboratory Medicine standardised
5 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-mildfrail_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 925791000000100,Mild frailty
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-modfrail_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 925831000000107,Moderate frailty
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-serfruc_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 1006751000000102,Serum fructosamine level
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/qof-diabetes/codelists/nhsd-primary-care-domain-refsets-sevfrail_cod.csv:
--------------------------------------------------------------------------------
1 | code,term
2 | 925861000000102,Severe frailty
3 | 


--------------------------------------------------------------------------------
/tests/acceptance/external_studies/test-age-distribution/analysis/dataset_definition.py:
--------------------------------------------------------------------------------
 1 | from ehrql import create_dataset
 2 | from ehrql.tables.smoketest import patients
 3 | 
 4 | index_year = 2022
 5 | min_age = 18
 6 | max_age = 80
 7 | 
 8 | year_of_birth = patients.date_of_birth.year
 9 | age = index_year - year_of_birth
10 | 
11 | dataset = create_dataset()
12 | dataset.define_population((age >= min_age) & (age <= max_age))
13 | dataset.age = age
14 | 


--------------------------------------------------------------------------------
/tests/autocomplete/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/autocomplete/__init__.py


--------------------------------------------------------------------------------
/tests/docker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/docker/__init__.py


--------------------------------------------------------------------------------
/tests/docker/test_drivers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | def test_driver_in_container(call_cli_docker, engine):
 5 |     # This test doesn't make sense for these in-memory databases
 6 |     if engine.name in {"in_memory", "sqlite"}:
 7 |         pytest.skip()
 8 | 
 9 |     backends = {
10 |         "mssql": "ehrql.backends.tpp.TPPBackend",
11 |         "trino": "ehrql.backends.emis.EMISBackend",
12 |     }
13 | 
14 |     if engine.name not in backends:
15 |         assert False, f"no backend for database: {engine.name}"
16 | 
17 |     backend = backends[engine.name]
18 |     url = engine.database.container_url()
19 | 
20 |     call_cli_docker(
21 |         "test-connection",
22 |         "--backend",
23 |         backend,
24 |         "--url",
25 |         url,
26 |     )
27 | 


--------------------------------------------------------------------------------
/tests/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/docs/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/bad_import.py:
--------------------------------------------------------------------------------
1 | # noqa: INP001
2 | from ehrql.tables.smoketest import no_such_table  # noqa: F401
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/bad_syntax.py:
--------------------------------------------------------------------------------
1 | what even is a Python
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/bad_types.py:
--------------------------------------------------------------------------------
1 | # noqa: INP001
2 | from ehrql.tables.core import patients
3 | 
4 | 
5 | patients.date_of_birth == patients.sex
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/empty_measures.py:
--------------------------------------------------------------------------------
1 | from ehrql import Measures
2 | 
3 | measures = Measures()
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/no_dataset.py:
--------------------------------------------------------------------------------
1 | datasat = {}
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/no_measures.py:
--------------------------------------------------------------------------------
1 | measuuuuures = []
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/no_population.py:
--------------------------------------------------------------------------------
1 | from ehrql import Dataset
2 | 
3 | dataset = Dataset()
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/not_a_dataset.py:
--------------------------------------------------------------------------------
1 | dataset = object()
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/not_measures_instance.py:
--------------------------------------------------------------------------------
1 | measures = object()
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/bad_definition_files/operator_error.py:
--------------------------------------------------------------------------------
1 | # noqa: INP001
2 | from ehrql.tables.core import patients
3 | 
4 | 
5 | patients.date_of_birth == "2000-01-01" | patients.date_of_birth == "1990-01-01"
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/codelist_csvs/categories.csv:
--------------------------------------------------------------------------------
1 | code,description,category
2 | 123A,Asthma,respiratory
3 | 123B,Severe asthma,respiratory
4 | 234C,Hypertension,other
5 | 345D,Hyperthyroidism,other
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/codelist_csvs/custom_col.csv:
--------------------------------------------------------------------------------
1 | 123Codes,description
2 | 123-A,Asthma
3 | 123-B,Severe asthma
4 | 123-C,Hypertension
5 | 123-D,Hyperthyroidism
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/codelist_csvs/default_col.csv:
--------------------------------------------------------------------------------
1 | code,description
2 | 123A,Asthma
3 | 123B,Severe asthma
4 | 234C,Hypertension
5 | 345D,Hyperthyroidism
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/codelist_csvs/extra_whitespace.csv:
--------------------------------------------------------------------------------
1 | code,description
2 | W123  ,Asthma
3 | W234, Severe asthma
4 | W345, Hypertension
5 |   W456,Hyperthyroidism
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_2021-01-01.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 1,2,3
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_2021-02-01.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 4,5,6
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_2021-03-01.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 7,8,9
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_20210908.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 10,11,12
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_code_2021-03-01.csv:
--------------------------------------------------------------------------------
1 | d,e,f
2 | 1,2,3
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_code_2021-04-01.csv:
--------------------------------------------------------------------------------
1 | d,e,f
2 | 4,5,6
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_error_2021-01-01.csv:
--------------------------------------------------------------------------------
1 | a,b,d
2 | 1,2,3
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_error_2021-02-01.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 1,2,3
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/csv_date_merging/measure_test_event.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 0,0,0
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/debug/patients.csv:
--------------------------------------------------------------------------------
1 | patient_id,date_of_birth,sex,date_of_death
2 | 1,1980-01-01,female,
3 | 2,1990-02-01,male,
4 | 3,2000-03-01,female,
5 | 4,2010-04-01,male,
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/dummy-data.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count
2 | 11,F,1,2021-01-01,1
3 | 22,M,0,,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/dummy-data.txt:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count
2 | 11,F,1,2021-01-01,1
3 | 22,M,0,,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/extra-column.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count,extra_col
2 | 11,F,1,2021-01-01,1,
3 | 22,M,0,,,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/invalid-bool.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count
2 | 11,F,X,2021-01-01,1
3 | 22,M,0,,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/invalid-date.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count
2 | 11,F,1,2021-021-021,1
3 | 22,M,0,,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/invalid-patient-id.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count
2 | Eleven,F,1,2021-01-01,1
3 | 22,M,0,,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/missing-column.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_count
2 | 11,F,1,1
3 | 22,M,0,
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/dummy_data/zero-date.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,has_event,event_date,event_count
2 | 11,F,1,2021-10-01,1
3 | 22,M,0,,
4 | 33,M,,0,
5 | 


--------------------------------------------------------------------------------
/tests/fixtures/good_definition_files/assurance.py:
--------------------------------------------------------------------------------
 1 | # noqa: INP001
 2 | from datetime import date
 3 | 
 4 | from ehrql import Dataset
 5 | from ehrql.tables.core import patients
 6 | 
 7 | 
 8 | dataset = Dataset()
 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01"))
10 | 
11 | test_data = {
12 |     # Correctly not expected in population
13 |     1: {
14 |         "patients": {"date_of_birth": date(1999, 12, 1)},
15 |         "expected_in_population": False,
16 |     },
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/fixtures/good_definition_files/chatty_dataset_definition.py:
--------------------------------------------------------------------------------
 1 | # noqa: INP001
 2 | import sys
 3 | 
 4 | from ehrql import create_dataset
 5 | from ehrql.tables.core import patients
 6 | 
 7 | 
 8 | print("I am a bit chatty", file=sys.stderr)
 9 | 
10 | dataset = create_dataset()
11 | dataset.year_of_birth = patients.date_of_birth.year
12 | dataset.define_population(patients.exists_for_patient())
13 | 


--------------------------------------------------------------------------------
/tests/fixtures/good_definition_files/dataset_definition.py:
--------------------------------------------------------------------------------
 1 | # noqa: INP001
 2 | from ehrql import create_dataset
 3 | from ehrql.tables.core import patients
 4 | 
 5 | 
 6 | dataset = create_dataset()
 7 | dataset.year_of_birth = patients.date_of_birth.year
 8 | dataset.sex = patients.sex
 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01"))
10 | 


--------------------------------------------------------------------------------
/tests/fixtures/good_definition_files/dataset_definition_with_print.py:
--------------------------------------------------------------------------------
 1 | # noqa: INP001
 2 | from ehrql import create_dataset
 3 | from ehrql.tables.core import patients
 4 | 
 5 | 
 6 | dataset = create_dataset()
 7 | dataset.year_of_birth = patients.date_of_birth.year
 8 | dataset.sex = patients.sex
 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01"))
10 | print("user stdout")
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/good_definition_files/debug_definition.py:
--------------------------------------------------------------------------------
 1 | # noqa: INP001
 2 | from ehrql import create_dataset, show
 3 | from ehrql.tables.core import patients
 4 | 
 5 | 
 6 | dataset = create_dataset()
 7 | show(dataset)
 8 | dataset.sex = patients.sex
 9 | dataset.define_population(patients.date_of_birth.is_on_or_after("2000-01-01"))
10 | dataset.year_of_birth = patients.date_of_birth.year
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/good_definition_files/measure_definitions.py:
--------------------------------------------------------------------------------
 1 | # noqa: INP001
 2 | from ehrql import INTERVAL, Measures, years
 3 | from ehrql.tables.core import patients
 4 | 
 5 | 
 6 | measures = Measures()
 7 | 
 8 | measures.define_measure(
 9 |     "births",
10 |     numerator=patients.date_of_birth.is_during(INTERVAL),
11 |     denominator=patients.exists_for_patient(),
12 |     group_by={"sex": patients.sex},
13 |     intervals=years(2).starting_on("2020-01-01"),
14 | )
15 | 


--------------------------------------------------------------------------------
/tests/fixtures/local_file_engine/events.csv:
--------------------------------------------------------------------------------
1 | patient_id,score
2 | 1,2
3 | 1,3
4 | 1,4
5 | 2,5
6 | 2,10
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/local_file_engine/patients.csv:
--------------------------------------------------------------------------------
1 | patient_id,sex,ignored_column
2 | 1,M,a
3 | 2,F,b
4 | 3,,
5 | 


--------------------------------------------------------------------------------
/tests/fixtures/quiz-example-data/addresses.csv:
--------------------------------------------------------------------------------
 1 | patient_id,address_id,start_date,end_date,rural_urban_classification,imd_rounded,msoa_code
 2 | 2,1001,1993-01-30,2015-08-13,2,19600,E02008618
 3 | 2,1002,2015-08-13,,4,11700,E02002421
 4 | 4,3001,2007-06-01,,7,28600,E02000623
 5 | 5,4001,1981-03-12,2018-05-23,5,21200,E02000692
 6 | 6,5001,2020-07-13,,2,5300,E02003054
 7 | 7,6001,2013-11-19,,3,27700,E02007050
 8 | 8,7001,2018-12-03,,2,16800,E02006760
 9 | 9,8001,1999-04-29,2017-11-10,1,26800,E02003208
10 | 9,8002,2017-11-10,,2,21400,E02009132
11 | 10,9001,2005-03-28,,1,6900,E02001319
12 | 10,9002,2015-04-18,,1,6400,E02001792
13 | 


--------------------------------------------------------------------------------
/tests/fixtures/quiz-example-data/clinical_events.csv:
--------------------------------------------------------------------------------
 1 | patient_id,date,snomedct_code,ctv3_code,numeric_value
 2 | 1,2014-01-11,195967001,H33..,
 3 | 1,2014-04-10,60621009,X76C0,25.8
 4 | 2,2015-08-06,195967001,H33..,
 5 | 2,2017-04-12,60621009,X76C2,18.4
 6 | 2,2018-05-26,60621009,X76C1,23.1
 7 | 2,2020-05-17,73211009,C10..,
 8 | 3,2017-05-11,60621009,X76C3,29.5
 9 | 4,2019-05-16,60621009,X76C4,34.3
10 | 4,2022-11-09,73211009,C10..,
11 | 5,2017-05-11,195967001,H33..,
12 | 5,2017-05-23,60621009,X76C5,22.3
13 | 5,2017-08-01,60621009,X76C6,19.9
14 | 6,2017-07-11,73211009,C10..,
15 | 6,2018-08-16,60621009,X76C7,22.8
16 | 6,2019-07-06,195967001,H33..,
17 | 7,2018-01-06,60621009,X76C8,35.2
18 | 8,2021-01-27,73211009,C10..,
19 | 8,2022-10-25,60621009,X76C9,16.3
20 | 9,2015-07-12,195967001,H33..,
21 | 10,2015-03-14,73211009,C10..,
22 | 


--------------------------------------------------------------------------------
/tests/fixtures/quiz-example-data/medications.csv:
--------------------------------------------------------------------------------
 1 | patient_id,date,dmd_code
 2 | 1,2014-01-11,39113611000001102
 3 | 2,2015-08-06,39113611000001102
 4 | 2,2018-09-21,39113311000001107
 5 | 2,2020-05-17,22777311000001105
 6 | 4,2022-11-09,22777311000001105
 7 | 5,2017-05-11,39113611000001102
 8 | 6,2017-07-11,3484711000001105
 9 | 6,2019-07-06,39113611000001102
10 | 8,2021-01-27,3484711000001105
11 | 10,2015-03-14,3484711000001105
12 | 


--------------------------------------------------------------------------------
/tests/fixtures/quiz-example-data/ons_deaths.csv:
--------------------------------------------------------------------------------
1 | "patient_id","date","place","underlying_cause_of_death","cause_of_death_01","cause_of_death_02","cause_of_death_03","cause_of_death_04","cause_of_death_05","cause_of_death_06","cause_of_death_07","cause_of_death_08","cause_of_death_09","cause_of_death_10","cause_of_death_11","cause_of_death_12","cause_of_death_13","cause_of_death_14","cause_of_death_15"
2 | 1,2015-09-14,"Hospital","C91.1",,,,,,,,,,,,,,,
3 | 5,2018-05-23,"Home","I21.0",,,,,,,,,,,,,,,
4 | 9,2017-11-10,"Care Home","I69.4",,,,,,,,,,,,,,,
5 | 


--------------------------------------------------------------------------------
/tests/fixtures/quiz-example-data/patients.csv:
--------------------------------------------------------------------------------
 1 | patient_id,date_of_birth,sex,date_of_death
 2 | 1,1973-07-01,female,2015-09-14
 3 | 2,1948-03-01,male,
 4 | 3,2003-04-01,male,
 5 | 4,2007-06-01,female,
 6 | 5,1938-10-01,male,2018-05-23
 7 | 6,1994-04-01,female,
 8 | 7,1953-05-01,male,
 9 | 8,1992-08-01,female,
10 | 9,1931-10-01,female,2017-11-10
11 | 10,1979-04-01,male,
12 | 


--------------------------------------------------------------------------------
/tests/fixtures/quiz-example-data/practice_registrations.csv:
--------------------------------------------------------------------------------
 1 | patient_id,start_date,end_date,practice_pseudo_id,practice_stp,practice_nuts1_region_name
 2 | 1,2012-09-23,2015-09-14,7055,E54000048,North West
 3 | 2,1993-02-04,,975,E54000009,North East
 4 | 3,2004-12-03,,6669,E54000019,West Midlands
 5 | 4,2007-09-13,2019-03-04,7199,E54000026,East
 6 | 4,2019-01-02,,7199,E54000026,East
 7 | 5,2007-07-21,2018-05-23,8595,E54000024,East
 8 | 6,2020-09-14,,5045,E54000017,West Midlands
 9 | 7,2013-12-05,,7588,E54000050,Yorkshire and The Humber
10 | 7,2017-05-12,,5121,E54000050,Yorkshire and The Humber
11 | 8,2020-01-19,,1858,E54000012,West Midlands
12 | 9,1995-03-01,2017-11-10,8189,E54000023,East
13 | 10,2001-10-24,,7783,E54000037,South West
14 | 


--------------------------------------------------------------------------------
/tests/functional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/functional/__init__.py


--------------------------------------------------------------------------------
/tests/functional/test_assure.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | 
 4 | FIXTURES_PATH = Path(__file__).parents[1] / "fixtures" / "good_definition_files"
 5 | 
 6 | 
 7 | def test_assure(call_cli):
 8 |     captured = call_cli("assure", FIXTURES_PATH / "assurance.py")
 9 |     assert "All OK" in captured.out
10 | 


--------------------------------------------------------------------------------
/tests/functional/test_dump_example_data.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | 
3 | 
4 | def test_dump_example_data(call_cli, tmp_path):
5 |     with contextlib.chdir(tmp_path):
6 |         call_cli("dump-example-data")
7 |     filenames = [path.name for path in (tmp_path / "example-data").iterdir()]
8 |     assert "patients.csv" in filenames
9 | 


--------------------------------------------------------------------------------
/tests/functional/test_entrypoint.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import subprocess
 3 | import sys
 4 | 
 5 | 
 6 | def test_entrypoint():
 7 |     # Include the Python executable directory on the path so that even if the virtualenv
 8 |     # isn't activated we can still find the `ehrql` executable.
 9 |     path = os.pathsep.join(
10 |         [os.path.dirname(sys.executable), os.environ.get("PATH", "")]
11 |     )
12 | 
13 |     result = subprocess.run(
14 |         ["ehrql", "--help"],
15 |         capture_output=True,
16 |         text=True,
17 |         check=True,
18 |         env={"PATH": path},
19 |     )
20 |     assert "usage: ehrql [-h]" in result.stdout
21 | 


--------------------------------------------------------------------------------
/tests/functional/test_graph_query.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | FIXTURES_PATH = Path(__file__).parents[1] / "fixtures" / "good_definition_files"
 8 | 
 9 | 
10 | @pytest.mark.skipif(
11 |     shutil.which("dot") is None,
12 |     reason="Graphing requires Graphviz library",
13 | )
14 | def test_graph_query(call_cli, tmp_path):  # pragma: no cover
15 |     output_file = tmp_path / "query.svg"
16 |     call_cli(
17 |         "graph-query",
18 |         FIXTURES_PATH / "dataset_definition.py",
19 |         "--output",
20 |         output_file,
21 |     )
22 |     assert output_file.exists()
23 | 


--------------------------------------------------------------------------------
/tests/functional/test_isolation_report.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | @pytest.mark.skipif(
 8 |     not sys.platform.startswith("linux"),
 9 |     reason="Subprocess isolation only works on Linux",
10 | )
11 | def test_isolation_report(call_cli):
12 |     captured = call_cli("isolation-report")
13 |     assert json.loads(captured.out)
14 | 


--------------------------------------------------------------------------------
/tests/functional/test_serialize_definition.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | FIXTURES_PATH = Path(__file__).parents[1] / "fixtures" / "good_definition_files"
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     "definition_type,definition_file",
12 |     [
13 |         ("dataset", FIXTURES_PATH / "dataset_definition.py"),
14 |         ("measures", FIXTURES_PATH / "measure_definitions.py"),
15 |         ("test", FIXTURES_PATH / "assurance.py"),
16 |     ],
17 | )
18 | def test_serialize_definition(definition_type, definition_file, call_cli):
19 |     captured = call_cli(
20 |         "serialize-definition",
21 |         "--definition-type",
22 |         definition_type,
23 |         definition_file,
24 |     )
25 |     # We rely on tests elsewhere to ensure that the serialization is working correctly;
26 |     # here we just want to check that we return valid JSON
27 |     assert json.loads(captured.out)
28 |     # We shouldn't be producing any warnings or any other output
29 |     assert captured.err == ""
30 | 


--------------------------------------------------------------------------------
/tests/functional/test_test_connection.py:
--------------------------------------------------------------------------------
1 | def test_test_connection(mssql_database, call_cli):
2 |     env = {
3 |         "BACKEND": "ehrql.backends.tpp.TPPBackend",
4 |         "DATABASE_URL": mssql_database.host_url(),
5 |     }
6 |     captured = call_cli("test-connection", environ=env)
7 |     assert "SUCCESS" in captured.out
8 | 


--------------------------------------------------------------------------------
/tests/generative/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/generative/__init__.py


--------------------------------------------------------------------------------
/tests/generative/conftest.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from .recording import recorder
 4 | 
 5 | 
 6 | __all__ = ["recorder"]
 7 | 
 8 | 
 9 | class BrokenDatabaseError(KeyboardInterrupt):
10 |     def __init__(self, database):  # pragma: no cover
11 |         self.database = database
12 | 
13 | 
14 | def pytest_keyboard_interrupt(excinfo):  # pragma: no cover
15 |     if isinstance(excinfo.value, BrokenDatabaseError):
16 |         print(f"Unrecoverably broken {excinfo.value.database} database")
17 |         sys.exit(6)
18 | 


--------------------------------------------------------------------------------
/tests/generative/data_setup.py:
--------------------------------------------------------------------------------
 1 | from ehrql.query_model.nodes import (
 2 |     AggregateByPatient,
 3 |     Function,
 4 |     SelectPatientTable,
 5 |     SelectTable,
 6 | )
 7 | from tests.lib.orm_utils import orm_classes_from_tables
 8 | 
 9 | 
10 | def setup(schema, num_patient_tables, num_event_tables):
11 |     patient_tables = [
12 |         SelectPatientTable(f"p{i}", schema=schema) for i in range(num_patient_tables)
13 |     ]
14 |     event_tables = [
15 |         SelectTable(f"e{i}", schema=schema) for i in range(num_event_tables)
16 |     ]
17 |     all_tables = patient_tables + event_tables
18 | 
19 |     orm_classes = orm_classes_from_tables(all_tables)
20 |     _add_classes_to_module_namespace(orm_classes)
21 | 
22 |     patient_classes = [orm_classes[table.name] for table in patient_tables]
23 |     event_classes = [orm_classes[table.name] for table in event_tables]
24 | 
25 |     all_patients_query = _build_query(all_tables)
26 | 
27 |     # We arbitrarily choose the first patient class, but all the ORM classes share the
28 |     # same MetaData
29 |     metadata = patient_classes[0].metadata
30 | 
31 |     return (
32 |         patient_classes,
33 |         event_classes,
34 |         all_patients_query,
35 |         metadata,
36 |     )
37 | 
38 | 
39 | def _add_classes_to_module_namespace(orm_classes):
40 |     # It's helpful to have the classes available as module properties so that we can
41 |     # copy-paste failing test cases from Hypothesis. These classes naturally believe
42 |     # that they belong to the `orm_utils` module which created them, so we have to
43 |     # re-parent them here. We use only the final component of the module name as that's
44 |     # how we import it in `test_query_model`.
45 |     for class_ in orm_classes.values():
46 |         class_.__module__ = __name__.rpartition(".")[2]
47 |         globals()[class_.__name__] = class_
48 | 
49 | 
50 | def _build_query(tables):
51 |     clauses = [AggregateByPatient.Exists(source=table) for table in tables]
52 |     return _join_with_or(clauses)
53 | 
54 | 
55 | def _join_with_or(clauses):
56 |     query = clauses[0]
57 |     for clause in clauses[1:]:
58 |         query = Function.Or(query, clause)
59 |     return query
60 | 


--------------------------------------------------------------------------------
/tests/generative/example.py:
--------------------------------------------------------------------------------
 1 | # A tiny example of a generative test case defined in a standalone file so that we can
 2 | # check that the `test_query_model_example_file` function works correctly
 3 | from ehrql.query_model.nodes import (
 4 |     AggregateByPatient,
 5 |     Column,
 6 |     Dataset,
 7 |     SelectColumn,
 8 |     SelectPatientTable,
 9 |     TableSchema,
10 | )
11 | 
12 | 
13 | p0 = SelectPatientTable(
14 |     "p0",
15 |     TableSchema(
16 |         i1=Column(int),
17 |     ),
18 | )
19 | 
20 | dataset = Dataset(
21 |     population=AggregateByPatient.Exists(p0),
22 |     variables={"v": SelectColumn(p0, "i1")},
23 |     events={},
24 |     measures=None,
25 | )
26 | data = []
27 | 


--------------------------------------------------------------------------------
/tests/generative/generic_strategies.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import strategies as st
 2 | 
 3 | 
 4 | # This is a variable that will normally be set to True, but which shrinking is
 5 | # allowed to make False.
 6 | #
 7 | # Examples of where this is useful:
 8 | #
 9 | # * Turning off expensive test operations that you want to check but don't care
10 | #   about running if they turn out not to be relevant to the error you're
11 | #   seeing.
12 | # * Giving the shrinker a place to terminate generation in places where you've
13 | #   decided to do it yourself because e.g. the data was getting too large.
14 | usually = st.integers(0, 255).map(lambda n: n > 0)
15 | 
16 | 
17 | @st.composite
18 | def usually_all_of(draw, options, min_size=1):
19 |     """Generates a list of distinct elements drawn from `options`, of size at least
20 |     `min_size`. In the normal course of things, this will usually be all of `options`,
21 |     but the shrinker is allowed to remove elements from it, which can speed up
22 |     test execution during shrinking significantly."""
23 |     flags = draw(st.lists(usually, min_size=len(options), max_size=len(options)))
24 | 
25 |     # In order to make sure enough of these are set, we set some
26 |     # of the flags to true. We do this unconditionally on whether enough
27 |     # flags are already set so that when shrinking we don't start to generate
28 |     # more data when some of the flags are shrunk to false.
29 |     extra_flags = draw(
30 |         st.lists(
31 |             st.integers(0, len(options) - 1),
32 |             min_size=min_size,
33 |             max_size=min_size,
34 |             unique=True,
35 |         )
36 |     )
37 |     n_set = flags.count(True)
38 |     for i in extra_flags[: max(min_size - n_set, 0)]:
39 |         flags[i] = True
40 |     return [option for option, include in zip(options, flags) if include]
41 | 


--------------------------------------------------------------------------------
/tests/generative/test_data_setup.py:
--------------------------------------------------------------------------------
 1 | import hypothesis as hyp
 2 | from hypothesis.vendor.pretty import pretty
 3 | 
 4 | from . import test_query_model
 5 | 
 6 | 
 7 | # We just need a single non-empty example to check, and we want to keep the test
 8 | # deterministic
 9 | @hyp.given(example=test_query_model.data_strategy)
10 | @hyp.settings(max_examples=1, derandomize=True)
11 | def test_data_strategy_examples_round_trip(example):
12 |     """
13 |     Examples produced by `data_strategy` contain references to classes dynamically
14 |     generated in `data_setup` and we need to do some underhand stuff to make sure they
15 |     can be copy/pasted back into `@hypothesis.example()` and evaluate correctly.
16 | 
17 |     We've broken this properly once without realising so this test ensures we don't do so
18 |     again.
19 |     """
20 |     hyp.assume(len(example) > 0)
21 |     # `pretty` is the formatter Hypothesis uses for examples
22 |     example_repr = pretty(example)
23 |     # Evaluate it in the context of the `test_query_model` module, which is where
24 |     # examples will get pasted
25 |     evaled = eval(example_repr, globals(), vars(test_query_model))
26 |     assert evaled == example
27 | 


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/__init__.py


--------------------------------------------------------------------------------
/tests/integration/backends/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/backends/__init__.py


--------------------------------------------------------------------------------
/tests/integration/backends/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sqlalchemy
 3 | 
 4 | from ehrql.backends.emis import EMISBackend
 5 | from ehrql.backends.tpp import TPPBackend
 6 | 
 7 | 
 8 | def _get_select_all_query(request, backend):
 9 |     try:
10 |         ql_table = request.function._table
11 |     except AttributeError:  # pragma: no cover
12 |         raise RuntimeError(
13 |             f"Function '{request.function.__name__}' needs the "
14 |             f"`@register_test_for(table)` decorator applied"
15 |         )
16 | 
17 |     qm_table = ql_table._qm_node
18 |     sql_table = backend.get_table_expression(qm_table.name, qm_table.schema)
19 |     columns = [
20 |         # Using `type_coerce(..., None)` like this strips the type information from the
21 |         # SQLAlchemy column meaning we get back the type that the column actually is in
22 |         # database, not the type we've told SQLAlchemy it is.
23 |         sqlalchemy.type_coerce(column, None).label(column.key)
24 |         for column in sql_table.columns
25 |     ]
26 |     return sqlalchemy.select(*columns)
27 | 
28 | 
29 | def _select_all_fn(select_all_query, database):
30 |     def _select_all(*input_data):
31 |         database.setup(*input_data)
32 |         with database.engine().connect() as connection:
33 |             results = connection.execute(select_all_query)
34 |             return sorted(
35 |                 [row._asdict() for row in results], key=lambda x: x["patient_id"]
36 |             )
37 | 
38 |     return _select_all
39 | 
40 | 
41 | @pytest.fixture
42 | def select_all_emis(request, trino_database):
43 |     select_all_query = _get_select_all_query(request, EMISBackend())
44 |     return _select_all_fn(select_all_query, trino_database)
45 | 
46 | 
47 | @pytest.fixture
48 | def select_all_tpp(request, mssql_database):
49 |     backend = TPPBackend(config={"TEMP_DATABASE_NAME": "temp_tables"})
50 |     select_all_query = _get_select_all_query(request, backend)
51 |     return _select_all_fn(select_all_query, mssql_database)
52 | 


--------------------------------------------------------------------------------
/tests/integration/file_formats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/file_formats/__init__.py


--------------------------------------------------------------------------------
/tests/integration/file_formats/test_csv.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | 
 3 | import pytest
 4 | 
 5 | from ehrql.file_formats import write_rows
 6 | from ehrql.query_model.column_specs import ColumnSpec
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("basename", [None, "file.csv", "file.csv.gz"])
10 | def test_write_rows_csv(tmp_path, capsys, basename):
11 |     if basename is None:
12 |         filename = None
13 |     else:
14 |         filename = tmp_path / "somedir" / basename
15 | 
16 |     column_specs = {
17 |         "patient_id": ColumnSpec(int),
18 |         "year_of_birth": ColumnSpec(int),
19 |         "sex": ColumnSpec(str),
20 |     }
21 |     results = [
22 |         (123, 1980, "F"),
23 |         (456, None, None),
24 |         (789, 1999, "M"),
25 |     ]
26 | 
27 |     write_rows(filename, results, column_specs)
28 | 
29 |     if basename is None:
30 |         output = capsys.readouterr().out
31 |     elif basename.endswith(".csv.gz"):
32 |         with gzip.open(filename, "rt") as f:
33 |             output = f.read()
34 |     elif basename.endswith(".csv"):
35 |         output = filename.read_text()
36 |     else:
37 |         assert False
38 | 
39 |     assert output.splitlines() == [
40 |         "patient_id,year_of_birth,sex",
41 |         "123,1980,F",
42 |         "456,,",
43 |         "789,1999,M",
44 |     ]
45 | 


--------------------------------------------------------------------------------
/tests/integration/measures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/measures/__init__.py


--------------------------------------------------------------------------------
/tests/integration/query_engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/query_engines/__init__.py


--------------------------------------------------------------------------------
/tests/integration/query_engines/test_dialects.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sqlalchemy
 3 | 
 4 | 
 5 | def test_case_statement(engine):
 6 |     """
 7 |     Test a basic CASE statement returning a string value. This exposed a bug in the
 8 |     string handling of our Spark dialect so it's useful to keep it around.
 9 |     """
10 |     if engine.name == "in_memory":
11 |         pytest.skip("SQLAlchemy dialect tests do not apply to the in-memory engine")
12 | 
13 |     case_statement = sqlalchemy.case(
14 |         (sqlalchemy.literal(1) == 0, "foo"),
15 |         (sqlalchemy.literal(1) == 1, "bar"),
16 |     )
17 |     query = sqlalchemy.select(case_statement.label("output"))
18 |     with engine.sqlalchemy_engine().connect() as conn:
19 |         results = list(conn.execute(query))
20 |     assert results[0].output == "bar"
21 | 


--------------------------------------------------------------------------------
/tests/integration/query_engines/test_local_file.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ehrql import Dataset
 4 | from ehrql.query_engines.local_file import LocalFileQueryEngine
 5 | from ehrql.tables import EventFrame, PatientFrame, Series, table
 6 | 
 7 | 
 8 | FIXTURES = Path(__file__).parents[2] / "fixtures" / "local_file_engine"
 9 | 
10 | 
11 | @table
12 | class patients(PatientFrame):
13 |     sex = Series(str)
14 | 
15 | 
16 | @table
17 | class events(EventFrame):
18 |     score = Series(int)
19 |     # Columns in the schema which aren't in the data files should just end up NULL
20 |     expected_missing = Series(bool)
21 | 
22 | 
23 | def test_local_file_query_engine():
24 |     dataset = Dataset()
25 |     dataset.sex = patients.sex
26 |     dataset.total_score = events.score.sum_for_patient()
27 |     # Check that missing columns end up NULL
28 |     dataset.missing = events.where(
29 |         events.expected_missing.is_null()
30 |     ).count_for_patient()
31 | 
32 |     dataset.define_population(patients.exists_for_patient())
33 |     dataset_qm = dataset._compile()
34 | 
35 |     query_engine = LocalFileQueryEngine(FIXTURES)
36 |     results = query_engine.get_results(dataset_qm)
37 | 
38 |     assert list(results) == [
39 |         (1, "M", 9, 3),
40 |         (2, "F", 15, 2),
41 |         (3, None, None, 0),
42 |     ]
43 | 


--------------------------------------------------------------------------------
/tests/integration/query_engines/test_mssql_dialect.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import pytest
 4 | import sqlalchemy
 5 | 
 6 | from ehrql.query_engines.mssql_dialect import MSSQLDialect
 7 | 
 8 | 
 9 | def test_date_literals_have_correct_type(mssql_engine):
10 |     case_statement = sqlalchemy.case(
11 |         (
12 |             sqlalchemy.literal(1) == 1,
13 |             datetime.date(2000, 10, 5),
14 |         ),
15 |     )
16 |     query = sqlalchemy.select(case_statement.label("output"))
17 |     with mssql_engine.sqlalchemy_engine().connect() as conn:
18 |         results = list(conn.execute(query))
19 |     assert results[0].output == datetime.date(2000, 10, 5)
20 | 
21 | 
22 | def test_enforces_minimum_server_version(mssql_engine, monkeypatch):
23 |     monkeypatch.setattr(MSSQLDialect, "minimum_server_version", (999999,))
24 |     with pytest.raises(RuntimeError, match=r"we require at least \(999999,\)"):
25 |         mssql_engine.sqlalchemy_engine().connect()
26 | 
27 | 
28 | def test_float_literals_have_correct_type(mssql_engine):
29 |     # When using the `pymssql` driver without special float handling the "0.5" below
30 |     # gets typed as a decimal and then the result of SUM gets typed as fixed precision
31 |     # decimal.
32 |     sum_literal = sqlalchemy.func.sum(0.5)
33 |     # When added to a decimal of greater precision, the result gets rounded and ends up
34 |     # being 0.8
35 |     query = sqlalchemy.select(sum_literal + 0.25)
36 |     with mssql_engine.sqlalchemy_engine().connect() as conn:
37 |         results = list(conn.execute(query))
38 |     # By explicitly casting floats in our custom dialect we can get the correct result
39 |     assert results[0][0] == 0.75
40 | 


--------------------------------------------------------------------------------
/tests/integration/query_engines/test_trino_dialect.py:
--------------------------------------------------------------------------------
 1 | from ehrql.query_model.nodes import (
 2 |     AggregateByPatient,
 3 |     Column,
 4 |     Dataset,
 5 |     Function,
 6 |     InlinePatientTable,
 7 |     SelectColumn,
 8 |     TableSchema,
 9 | )
10 | 
11 | 
12 | def test_float_precision(trino_engine):
13 |     # This tests that Trino uses 64-bit precision for float columns in inline tables.
14 |     v1, v2 = 1, 0.001
15 | 
16 |     schema = TableSchema(f1=Column(float), f2=Column(float))
17 |     t = InlinePatientTable(
18 |         ((1, v1, v2),),
19 |         schema,
20 |     )
21 |     f1 = SelectColumn(t, "f1")
22 |     f2 = SelectColumn(t, "f2")
23 | 
24 |     dataset = Dataset(
25 |         population=AggregateByPatient.Exists(t),
26 |         variables={"v": Function.Subtract(f1, Function.Add(f1, f2))},
27 |         events={},
28 |         measures=None,
29 |     )
30 | 
31 |     results = trino_engine.extract(dataset)
32 |     assert results[0]["v"] == v1 - (v1 + v2)
33 | 


--------------------------------------------------------------------------------
/tests/integration/query_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/query_model/__init__.py


--------------------------------------------------------------------------------
/tests/integration/query_model/test_transforms.py:
--------------------------------------------------------------------------------
 1 | from ehrql.query_model.nodes import (
 2 |     AggregateByPatient,
 3 |     Dataset,
 4 |     PickOneRowPerPatient,
 5 |     Position,
 6 |     SelectColumn,
 7 |     SelectTable,
 8 |     Sort,
 9 |     TableSchema,
10 | )
11 | 
12 | 
13 | events = SelectTable(
14 |     "events",
15 |     schema=TableSchema.from_primitives(i=int, b=bool),
16 | )
17 | 
18 | 
19 | def test_sort_booleans_null_first(engine):
20 |     # The transforms add sorts for unsorted selected columns. Here we're checking the semantics
21 |     # of the sort added for boolean columns (which are handled explicitly because some databases
22 |     # don't allow sorting on booleans.
23 |     #
24 |     # The desired sort order is: NULL, False, True.
25 |     #
26 |     # Each of these patients has two records with different boolean values so we do pairwise
27 |     # comparisons. The integer column is there only so we can specify a sort on it in the query
28 |     # model.
29 |     engine.populate(
30 |         {
31 |             events: [
32 |                 dict(patient_id=0, row_id=0, i=0, b=False),
33 |                 dict(patient_id=0, row_id=1, i=0, b=True),
34 |                 dict(patient_id=1, row_id=2, i=0, b=None),
35 |                 dict(patient_id=1, row_id=3, i=0, b=True),
36 |                 dict(patient_id=2, row_id=4, i=0, b=None),
37 |                 dict(patient_id=2, row_id=5, i=0, b=False),
38 |             ]
39 |         }
40 |     )
41 | 
42 |     # Sort the events by i and pick the b from the last row.
43 |     by_i = Sort(events, SelectColumn(events, "i"))
44 |     variable = SelectColumn(
45 |         PickOneRowPerPatient(source=by_i, position=Position.LAST),
46 |         "b",
47 |     )
48 |     population = AggregateByPatient.Exists(events)
49 |     dataset = Dataset(
50 |         population=population, variables={"v": variable}, events={}, measures=None
51 |     )
52 | 
53 |     assert engine.extract(dataset) == [
54 |         dict(patient_id=0, v=True),  # True sorts after False
55 |         dict(patient_id=1, v=True),  # True sorts after NULL
56 |         dict(patient_id=2, v=False),  # False sorts after NULL
57 |     ]
58 | 


--------------------------------------------------------------------------------
/tests/integration/tables/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/tables/__init__.py


--------------------------------------------------------------------------------
/tests/integration/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/integration/utils/__init__.py


--------------------------------------------------------------------------------
/tests/integration/utils/test_sqlalchemy_exec_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sqlalchemy
 3 | import sqlalchemy.orm
 4 | 
 5 | from ehrql.utils.sqlalchemy_exec_utils import fetch_table_in_batches
 6 | 
 7 | 
 8 | Base = sqlalchemy.orm.declarative_base()
 9 | 
10 | 
11 | class SomeTable(Base):
12 |     __tablename__ = "some_table"
13 |     pk = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True, autoincrement=False)
14 |     key = sqlalchemy.Column(sqlalchemy.Integer)
15 |     foo = sqlalchemy.Column(sqlalchemy.String)
16 | 
17 | 
18 | def test_fetch_table_in_batches_unique(engine):
19 |     if engine.name == "in_memory":
20 |         pytest.skip("SQL tests do not apply to in-memory engine")
21 | 
22 |     table_size = 15
23 |     batch_size = 6
24 | 
25 |     table_data = [(i, i, f"foo{i}") for i in range(table_size)]
26 | 
27 |     engine.setup([SomeTable(pk=row[0], key=row[1], foo=row[2]) for row in table_data])
28 | 
29 |     table = SomeTable.__table__
30 | 
31 |     with engine.sqlalchemy_engine().connect() as connection:
32 |         results = fetch_table_in_batches(
33 |             connection.execute,
34 |             table,
35 |             0,
36 |             key_is_unique=True,
37 |             batch_size=batch_size,
38 |         )
39 |         results = list(results)
40 | 
41 |     assert results == table_data
42 | 
43 | 
44 | def test_fetch_table_in_batches_nonunique(engine):
45 |     if engine.name == "in_memory":
46 |         pytest.skip("SQL tests do not apply to in-memory engine")
47 | 
48 |     batch_size = 6
49 |     repeats = [1, 2, 3, 4, 5, 0, 5, 4, 3, 2, 1]
50 |     keys = [key for key, n in enumerate(repeats) for _ in range(n)]
51 |     table_data = [(i, key, f"foo{i}") for i, key in enumerate(keys)]
52 | 
53 |     engine.setup([SomeTable(pk=row[0], key=row[1], foo=row[2]) for row in table_data])
54 | 
55 |     table = SomeTable.__table__
56 | 
57 |     with engine.sqlalchemy_engine().connect() as connection:
58 |         results = fetch_table_in_batches(
59 |             connection.execute,
60 |             table,
61 |             0,
62 |             key_is_unique=False,
63 |             batch_size=batch_size,
64 |         )
65 |         results = sorted(results)
66 | 
67 |     assert results == table_data
68 | 


--------------------------------------------------------------------------------
/tests/integration/utils/test_sqlalchemy_query_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sqlalchemy
 3 | 
 4 | from ehrql.utils.sqlalchemy_query_utils import InsertMany
 5 | 
 6 | 
 7 | table = sqlalchemy.Table(
 8 |     "t",
 9 |     sqlalchemy.MetaData(),
10 |     sqlalchemy.Column("i", sqlalchemy.Integer()),
11 |     sqlalchemy.Column("s", sqlalchemy.String()),
12 | )
13 | 
14 | 
15 | def test_insert_many(engine):
16 |     if engine.name == "in_memory":
17 |         pytest.skip("SQL tests do not apply to in-memory engine")
18 | 
19 |     # We need enough rows that we exercise SQLAlchemy's internal batching logic, but not
20 |     # so many that we significantly slow down the test
21 |     rows = [(i, f"a{i}") for i in range(5000)]
22 | 
23 |     insert_many = InsertMany(
24 |         table,
25 |         # Test that we can handle an iterator rather than just a list
26 |         iter(rows),
27 |         batch_size=2000,
28 |     )
29 | 
30 |     with engine.sqlalchemy_engine().connect() as connection:
31 |         connection.execute(sqlalchemy.schema.CreateTable(table))
32 |         try:
33 |             connection.execute(insert_many)
34 |             response = connection.execute(sqlalchemy.select(table))
35 |             results = list(response)
36 |         finally:
37 |             # Explicitly drop the table as it persists in the Trino engine
38 |             connection.execute(sqlalchemy.schema.DropTable(table))
39 | 
40 |     assert sorted(results) == rows
41 | 


--------------------------------------------------------------------------------
/tests/integration/utils/test_traceback_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | 
 6 | import ehrql
 7 | from ehrql.loaders import DefinitionError, load_module
 8 | from ehrql.tables import smoketest
 9 | 
10 | 
11 | FIXTURES = Path(__file__).parents[2] / "fixtures" / "bad_definition_files"
12 | 
13 | 
14 | def test_traceback_starts_with_user_code():
15 |     filename = FIXTURES / "bad_import.py"
16 |     message = f'Traceback (most recent call last):\n  File "{filename}"'
17 |     with pytest.raises(DefinitionError, match=re.escape(message)):
18 |         load_module(filename)
19 | 
20 | 
21 | def test_traceback_ends_with_user_code():
22 |     filename = FIXTURES / "bad_types.py"
23 |     with pytest.raises(DefinitionError) as excinfo:
24 |         load_module(filename)
25 |     # We shouldn't have any references to ehrql code in the traceback
26 |     ehrql_root = str(Path(ehrql.__file__).parent)
27 |     assert not re.search(re.escape(ehrql_root), str(excinfo.value))
28 | 
29 | 
30 | def test_references_to_failed_imports_from_ehrql_are_not_stripped_out():
31 |     filename = FIXTURES / "bad_import.py"
32 |     with pytest.raises(DefinitionError) as excinfo:
33 |         load_module(filename)
34 |     # We tried to import a name from `smoketest` which doesn't exist, though the module
35 |     # itself does. Therefore this module should be visible in the traceback.
36 |     assert re.search(re.escape(smoketest.__file__), str(excinfo.value))
37 | 
38 | 
39 | def test_traceback_filtering_handles_relative_paths():
40 |     relative_filename = (FIXTURES / "bad_import.py").relative_to(Path.cwd())
41 |     message = r'Traceback \(most recent call last\):\n  File ".*bad_import\.py"'
42 |     with pytest.raises(DefinitionError, match=message):
43 |         load_module(relative_filename)
44 | 
45 | 
46 | def test_traceback_filtering_handles_syntax_errors():
47 |     filename = FIXTURES / "bad_syntax.py"
48 |     message = (
49 |         r"^"
50 |         f"Error loading file '{filename}':"
51 |         r"\s+"
52 |         f'File "{filename}", line 1'
53 |         r"\s+"
54 |         r"what even is a Python"
55 |         r"[\s\^]+"
56 |         r"SyntaxError: invalid syntax"
57 |         r"$"
58 |     )
59 |     with pytest.raises(DefinitionError, match=message):
60 |         load_module(filename)
61 | 


--------------------------------------------------------------------------------
/tests/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/lib/__init__.py


--------------------------------------------------------------------------------
/tests/lib/create_tpp_test_db.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run this using:
 3 | 
 4 |     pytest -o python_functions=create tests/lib/create_tpp_test_db.py
 5 | 
 6 | It will start an MSSQL Docker container, create all the tables in the TPP schema, and
 7 | output the connection string needed to talk to this database.
 8 | """
 9 | 
10 | from .tpp_schema import Base  # pragma: no cover
11 | 
12 | 
13 | # This is not a test, but we can get pytest to run it as a test so we can re-use all the
14 | # fixture machinery. Because neither this file not this function are named appropriately
15 | # they avoid being discovered and executed during the normal test run. But we can run it
16 | # by passing the path and function name directly to pytest
17 | def create(request, mssql_database_with_session_scope):  # pragma: no cover
18 |     db = mssql_database_with_session_scope
19 |     db.setup(metadata=Base.metadata)
20 |     capturemanager = request.config.pluginmanager.getplugin("capturemanager")
21 |     with capturemanager.global_and_fixture_disabled():
22 |         print("\n\n=> Created TPP tables in test database")
23 |         print()
24 |         print("DSN for ehrQL:")
25 |         print(f"  DATABASE_URL='{db.host_url()}'")
26 |         print()
27 |         print("Connection string for VSCode MSSQL Extension:")
28 |         print(
29 |             f"  Server={db.host_from_host},{db.port_from_host};Database={db.db_name};"
30 |             f"User Id={db.username};Password={db.password};"
31 |         )
32 |         print()
33 | 


--------------------------------------------------------------------------------
/tests/lib/file_utils.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import gzip
 3 | 
 4 | from pyarrow.feather import read_table
 5 | 
 6 | from ehrql.file_formats import get_file_extension
 7 | 
 8 | 
 9 | def read_file_as_dicts(filename):
10 |     extension = get_file_extension(filename)
11 |     if extension == ".csv":
12 |         with open(filename, newline="") as f:
13 |             return list(csv.DictReader(f))
14 |     elif extension == ".csv.gz":
15 |         with gzip.open(filename, "rt", newline="") as f:
16 |             return list(csv.DictReader(f))
17 |     elif extension == ".arrow":
18 |         return read_table(str(filename)).to_pylist()
19 |     else:
20 |         assert False, f"Unsupported extension: {filename}"
21 | 


--------------------------------------------------------------------------------
/tests/lib/inspect_utils.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import inspect
 3 | import textwrap
 4 | 
 5 | 
 6 | def function_body_as_string(function):
 7 |     """
 8 |     Return the de-indented source code of the body of a function
 9 | 
10 |     This is useful for being able to specify the contents of test fixtures without
11 |     having to make them seperate files (which makes the tests harder to follow) or to
12 |     declare them as string literals (where you lose the benefits of syntax highlighting
13 |     and other tooling).
14 | 
15 |     Note that one downside of this vs string literals is that you can't use templating
16 |     in the same way to dynamically generate the fixture. Instead, you need to find some
17 |     way of specifying your placeholder values as valid Python and then call `.replace()`
18 |     on the resulting string.
19 |     """
20 |     source = textwrap.dedent(inspect.getsource(function))
21 |     parsed = ast.parse(source)
22 |     assert isinstance(parsed, ast.Module)
23 |     func_def = parsed.body[0]
24 |     assert isinstance(func_def, ast.FunctionDef)
25 |     first_line = func_def.body[0].lineno
26 |     body_lines = source.split("\n")[first_line - 1 :]
27 |     return textwrap.dedent("\n".join(body_lines))
28 | 


--------------------------------------------------------------------------------
/tests/lib/query_model_utils.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | 
 3 | from ehrql.query_model import nodes as query_model
 4 | 
 5 | 
 6 | def get_all_operations():
 7 |     "Return every operation defined in the query model"
 8 |     return [cls for cls in iterate_query_model_namespace() if is_operation(cls)]
 9 | 
10 | 
11 | def is_operation(cls):
12 |     "Return whether an arbitrary value is a query model operation class"
13 |     # We need to check this first or the `issubclass` check can fail
14 |     if not isinstance(cls, type):
15 |         return False
16 |     # We need to check it's a proper subclass as the Node base class isn't itself a
17 |     # dataclass so the `fields()` call will fail
18 |     if not issubclass(cls, query_model.Node) or cls is query_model.Node:
19 |         return False
20 |     # If it takes arguments it's an operation, otherwise it's an abstract type
21 |     return len(dataclasses.fields(cls)) > 0
22 | 
23 | 
24 | def iterate_query_model_namespace():
25 |     "Yield every value in the query_model module"
26 |     yield from vars(query_model).values()
27 |     yield from vars(query_model.Function).values()
28 |     yield from vars(query_model.AggregateByPatient).values()
29 | 


--------------------------------------------------------------------------------
/tests/lib/tpp_decision_support_reference.csv:
--------------------------------------------------------------------------------
1 | AlgorithmType,AlgorithmDescription,AlgorithmVersion,AlgorithmSourceLink
2 | 1,UK Electronic Frailty Index (eFI),1.0,https://academic.oup.com/ageing/article/45/3/353/1739750
3 | 


--------------------------------------------------------------------------------
/tests/spec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/spec/__init__.py


--------------------------------------------------------------------------------
/tests/spec/aggregate_frame/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Aggregating event and patient frames"
2 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_frame/test_count_for_patient.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e, p
 2 | 
 3 | 
 4 | title = "Counting the rows for each patient"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           | b1
 9 |         --+----
10 |         1 |
11 |         2 |
12 |         3 |
13 |         """,
14 |     e: """
15 |           | b1
16 |         --+----
17 |         1 |
18 |         1 |
19 |         2 |
20 |         """,
21 | }
22 | 
23 | 
24 | def test_count_for_patient_on_event_frame(spec_test):
25 |     spec_test(
26 |         table_data,
27 |         e.count_for_patient(),
28 |         {
29 |             1: 2,
30 |             2: 1,
31 |             3: 0,
32 |         },
33 |     )
34 | 
35 | 
36 | def test_count_for_patient_on_patient_frame(spec_test):
37 |     spec_test(
38 |         table_data,
39 |         p.count_for_patient(),
40 |         {
41 |             1: 1,
42 |             2: 1,
43 |             3: 1,
44 |         },
45 |     )
46 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_frame/test_exists_for_patient.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e, p
 2 | 
 3 | 
 4 | title = "Determining whether a row exists for each patient"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           | b1
 9 |         --+----
10 |         1 |
11 |         2 |
12 |         3 |
13 |         """,
14 |     e: """
15 |           | b1
16 |         --+----
17 |         1 |
18 |         1 |
19 |         2 |
20 |         """,
21 | }
22 | 
23 | 
24 | def test_exists_for_patient_on_event_frame(spec_test):
25 |     spec_test(
26 |         table_data,
27 |         e.exists_for_patient(),
28 |         {
29 |             1: True,
30 |             2: True,
31 |             3: False,
32 |         },
33 |     )
34 | 
35 | 
36 | def test_exists_for_patient_on_patient_frame(spec_test):
37 |     spec_test(
38 |         table_data,
39 |         p.exists_for_patient(),
40 |         {
41 |             1: True,
42 |             2: True,
43 |             3: True,
44 |         },
45 |     )
46 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_series/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Aggregating event series"
2 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_series/test_count_distinct_for_patient.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Count distinct aggregation"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1 |  f1 | s1 |     d1
 9 |         --+-----+-----+----+------------
10 |         1 | 101 | 1.1 |  a | 2020-01-01
11 |         1 | 102 | 1.2 |  b | 2020-01-02
12 |         1 | 103 | 1.5 |  c | 2020-01-03
13 |         2 | 201 | 2.1 |  a | 2020-02-01
14 |         2 | 201 | 2.1 |  a | 2020-02-01
15 |         2 | 203 | 2.5 |  b | 2020-02-02
16 |         3 | 301 | 3.1 |  a | 2020-03-01
17 |         3 | 301 | 3.1 |  a | 2020-03-01
18 |         3 |     |     |    |
19 |         3 |     |     |    |
20 |         4 |     |     |    |
21 |         """,
22 | }
23 | 
24 | 
25 | def test_count_distinct_for_patient_integer(spec_test):
26 |     spec_test(
27 |         table_data,
28 |         e.i1.count_distinct_for_patient(),
29 |         {
30 |             1: 3,
31 |             2: 2,
32 |             3: 1,
33 |             4: 0,
34 |         },
35 |     )
36 | 
37 | 
38 | def test_count_distinct_for_patient_float(spec_test):
39 |     spec_test(
40 |         table_data,
41 |         e.f1.count_distinct_for_patient(),
42 |         {
43 |             1: 3,
44 |             2: 2,
45 |             3: 1,
46 |             4: 0,
47 |         },
48 |     )
49 | 
50 | 
51 | def test_count_distinct_for_patient_string(spec_test):
52 |     spec_test(
53 |         table_data,
54 |         e.s1.count_distinct_for_patient(),
55 |         {
56 |             1: 3,
57 |             2: 2,
58 |             3: 1,
59 |             4: 0,
60 |         },
61 |     )
62 | 
63 | 
64 | def test_count_distinct_for_patient_date(spec_test):
65 |     spec_test(
66 |         table_data,
67 |         e.s1.count_distinct_for_patient(),
68 |         {
69 |             1: 3,
70 |             2: 2,
71 |             3: 1,
72 |             4: 0,
73 |         },
74 |     )
75 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_series/test_mean_for_patient.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Mean aggregation"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           | i1 | f1
 9 |         --+----+-----
10 |         1 |  1 | 1.1
11 |         1 |  2 | 2.1
12 |         1 |  3 | 3.1
13 |         2 |    |
14 |         2 |  2 | 2.1
15 |         2 |  3 | 3.1
16 |         3 |    |
17 |         """,
18 | }
19 | 
20 | 
21 | def test_mean_for_patient_integer(spec_test):
22 |     spec_test(
23 |         table_data,
24 |         e.i1.mean_for_patient(),
25 |         {
26 |             1: (1 + 2 + 3) / 3,
27 |             2: (2 + 3) / 2,
28 |             3: None,
29 |         },
30 |     )
31 | 
32 | 
33 | def test_mean_for_patient_float(spec_test):
34 |     spec_test(
35 |         table_data,
36 |         e.f1.mean_for_patient(),
37 |         {
38 |             1: (1.1 + 2.1 + 3.1) / 3,
39 |             2: (2.1 + 3.1) / 2,
40 |             3: None,
41 |         },
42 |     )
43 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_series/test_minimum_and_maximum_for_patient.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Minimum and maximum aggregations"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         1 | 102
12 |         1 | 103
13 |         2 | 201
14 |         2 |
15 |         3 |
16 |         """,
17 | }
18 | 
19 | 
20 | def test_minimum_for_patient(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         e.i1.minimum_for_patient(),
24 |         {
25 |             1: 101,
26 |             2: 201,
27 |             3: None,
28 |         },
29 |     )
30 | 
31 | 
32 | def test_maximum_for_patient(spec_test):
33 |     spec_test(
34 |         table_data,
35 |         e.i1.maximum_for_patient(),
36 |         {
37 |             1: 103,
38 |             2: 201,
39 |             3: None,
40 |         },
41 |     )
42 | 


--------------------------------------------------------------------------------
/tests/spec/aggregate_series/test_sum_for_patient.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Sum aggregation"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         1 | 102
12 |         1 | 103
13 |         2 | 201
14 |         2 |
15 |         2 | 203
16 |         3 |
17 |         """,
18 | }
19 | 
20 | 
21 | def test_sum_for_patient(spec_test):
22 |     spec_test(
23 |         table_data,
24 |         e.i1.sum_for_patient(),
25 |         {
26 |             1: (101 + 102 + 103),
27 |             2: (201 + 203),
28 |             3: None,
29 |         },
30 |     )
31 | 


--------------------------------------------------------------------------------
/tests/spec/bool_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on boolean series"
2 | 


--------------------------------------------------------------------------------
/tests/spec/bool_series_ops/test_conversion.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Convert a boolean value to an integer"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |          | b1
 9 |        --+----
10 |        1 |  T
11 |        2 |
12 |        3 |  F
13 |        """,
14 | }
15 | 
16 | 
17 | def test_bool_as_int(spec_test):
18 |     """
19 |     Booleans are converted to 0 (False) or 1 (True).
20 |     """
21 |     spec_test(
22 |         table_data,
23 |         p.b1.as_int(),
24 |         {1: 1, 2: None, 3: 0},
25 |     )
26 | 


--------------------------------------------------------------------------------
/tests/spec/bool_series_ops/test_logical_ops.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Logical operations"
 5 | 
 6 | 
 7 | def test_not(spec_test):
 8 |     table_data = {
 9 |         p: """
10 |               | b1
11 |             --+----
12 |             1 |  T
13 |             2 |
14 |             3 |  F
15 |             """,
16 |     }
17 | 
18 |     spec_test(
19 |         table_data,
20 |         ~p.b1,
21 |         {
22 |             1: False,
23 |             2: None,
24 |             3: True,
25 |         },
26 |     )
27 | 
28 | 
29 | table_data = {
30 |     p: """
31 |           | b1 | b2
32 |         --+----+----
33 |         1 |  T |  T
34 |         2 |  T |
35 |         3 |  T |  F
36 |         4 |    |  T
37 |         5 |    |
38 |         6 |    |  F
39 |         7 |  F |  T
40 |         8 |  F |
41 |         9 |  F |  F
42 |         """,
43 | }
44 | 
45 | 
46 | def test_and(spec_test):
47 |     spec_test(
48 |         table_data,
49 |         p.b1 & p.b2,
50 |         {
51 |             1: True,
52 |             2: None,
53 |             3: False,
54 |             4: None,
55 |             5: None,
56 |             6: False,
57 |             7: False,
58 |             8: False,
59 |             9: False,
60 |         },
61 |     )
62 | 
63 | 
64 | def test_or(spec_test):
65 |     spec_test(
66 |         table_data,
67 |         p.b1 | p.b2,
68 |         {
69 |             1: True,
70 |             2: True,
71 |             3: True,
72 |             4: True,
73 |             5: None,
74 |             6: None,
75 |             7: True,
76 |             8: None,
77 |             9: False,
78 |         },
79 |     )
80 | 


--------------------------------------------------------------------------------
/tests/spec/case_expressions/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Logical case expressions"
2 | 


--------------------------------------------------------------------------------
/tests/spec/case_expressions/test_when.py:
--------------------------------------------------------------------------------
 1 | from ehrql import when
 2 | 
 3 | from ..tables import p
 4 | 
 5 | 
 6 | title = "Case expressions with single condition"
 7 | 
 8 | 
 9 | def test_when_with_expression(spec_test):
10 |     table_data = {
11 |         p: """
12 |           | i1
13 |         --+----
14 |         1 | 6
15 |         2 | 7
16 |         3 | 8
17 |         4 |
18 |         """,
19 |     }
20 |     spec_test(
21 |         table_data,
22 |         when(p.i1 < 8).then(p.i1).otherwise(100),
23 |         {
24 |             1: 6,
25 |             2: 7,
26 |             3: 100,
27 |             4: 100,
28 |         },
29 |     )
30 | 
31 | 
32 | def test_when_with_boolean_column(spec_test):
33 |     table_data = {
34 |         p: """
35 |               | i1 | b1
36 |             --+----+----
37 |             1 | 6  | T
38 |             2 | 7  | F
39 |             3 |    |
40 |             """,
41 |     }
42 | 
43 |     spec_test(
44 |         table_data,
45 |         when(p.b1).then(p.i1).otherwise(100),
46 |         {
47 |             1: 6,
48 |             2: 100,
49 |             3: 100,
50 |         },
51 |     )
52 | 


--------------------------------------------------------------------------------
/tests/spec/code_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on all series containing codes"
2 | 


--------------------------------------------------------------------------------
/tests/spec/code_series_ops/test_containment.py:
--------------------------------------------------------------------------------
 1 | from ehrql.codes import SNOMEDCTCode, codelist_from_csv_lines
 2 | 
 3 | from ..tables import p
 4 | 
 5 | 
 6 | title = "Testing for containment using codes"
 7 | 
 8 | table_data = {
 9 |     p: """
10 |           |   c1
11 |         --+--------
12 |         1 | 123000
13 |         2 | 456000
14 |         3 | 789000
15 |         4 |
16 |         """,
17 | }
18 | 
19 | 
20 | def test_is_in(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         p.c1.is_in([SNOMEDCTCode("123000"), SNOMEDCTCode("789000")]),
24 |         {
25 |             1: True,
26 |             2: False,
27 |             3: True,
28 |             4: None,
29 |         },
30 |     )
31 | 
32 | 
33 | def test_is_not_in(spec_test):
34 |     spec_test(
35 |         table_data,
36 |         p.c1.is_not_in([SNOMEDCTCode("123000"), SNOMEDCTCode("789000")]),
37 |         {
38 |             1: False,
39 |             2: True,
40 |             3: False,
41 |             4: None,
42 |         },
43 |     )
44 | 
45 | 
46 | def test_is_in_codelist_csv(spec_test):
47 |     codelist = codelist_from_csv_lines(
48 |         [
49 |             "code",
50 |             "123000",
51 |             "789000",
52 |         ],
53 |         column="code",
54 |     )
55 | 
56 |     spec_test(
57 |         table_data,
58 |         p.c1.is_in(codelist),
59 |         {
60 |             1: True,
61 |             2: False,
62 |             3: True,
63 |             4: None,
64 |         },
65 |     )
66 | 


--------------------------------------------------------------------------------
/tests/spec/code_series_ops/test_map_codes_to_categories.py:
--------------------------------------------------------------------------------
 1 | from ehrql.codes import codelist_from_csv_lines
 2 | 
 3 | from ..tables import p
 4 | 
 5 | 
 6 | title = "Test mapping codes to categories using a categorised codelist"
 7 | 
 8 | table_data = {
 9 |     p: """
10 |           |   c1
11 |         --+--------
12 |         1 | 123000
13 |         2 | 456000
14 |         3 | 789000
15 |         4 |
16 |         """,
17 | }
18 | 
19 | 
20 | def test_map_codes_to_categories(spec_test):
21 |     codelist = codelist_from_csv_lines(
22 |         [
23 |             "code,my_categorisation",
24 |             "123000,cat1",
25 |             "789000,cat2",
26 |         ],
27 |         column="code",
28 |         category_column="my_categorisation",
29 |     )
30 | 
31 |     spec_test(
32 |         table_data,
33 |         p.c1.to_category(codelist),
34 |         {
35 |             1: "cat1",
36 |             2: None,
37 |             3: "cat2",
38 |             4: None,
39 |         },
40 |     )
41 | 


--------------------------------------------------------------------------------
/tests/spec/combine_series/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Combining series"
2 | 


--------------------------------------------------------------------------------
/tests/spec/combine_series/test_event_series_and_event_series.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Combining two event series"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1 |  i2 | s1
 9 |         --+-----+-----+---
10 |         1 | 101 | 111 | b
11 |         1 | 102 | 112 | a
12 |         2 | 201 | 211 | b
13 |         2 | 202 | 212 | a
14 |     """,
15 | }
16 | 
17 | 
18 | def test_event_series_and_event_series(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         (e.i1 + e.i2).sum_for_patient(),
22 |         {
23 |             1: (101 + 111) + (102 + 112),
24 |             2: (201 + 211) + (202 + 212),
25 |         },
26 |     )
27 | 
28 | 
29 | def test_event_series_and_sorted_event_series(spec_test):
30 |     """
31 |     The sort order of the underlying event series does not affect their combination.
32 |     """
33 |     spec_test(
34 |         table_data,
35 |         (e.i1 + e.sort_by(e.s1).i2).minimum_for_patient(),
36 |         {
37 |             1: (101 + 111),
38 |             2: (201 + 211),
39 |         },
40 |     )
41 | 


--------------------------------------------------------------------------------
/tests/spec/combine_series/test_event_series_and_patient_series.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e, p
 2 | 
 3 | 
 4 | title = "Combining an event series with a patient series"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         2 | 201
12 | 
13 |     """,
14 |     e: """
15 |           |  i1
16 |         --+-----
17 |         1 | 111
18 |         1 | 112
19 |         2 | 211
20 |         2 | 212
21 |     """,
22 | }
23 | 
24 | 
25 | def test_event_series_and_patient_series(spec_test):
26 |     spec_test(
27 |         table_data,
28 |         (e.i1 + p.i1).sum_for_patient(),
29 |         {
30 |             1: (111 + 101) + (112 + 101),
31 |             2: (211 + 201) + (212 + 201),
32 |         },
33 |     )
34 | 
35 | 
36 | def test_patient_series_and_event_series(spec_test):
37 |     spec_test(
38 |         table_data,
39 |         (p.i1 + e.i1).sum_for_patient(),
40 |         {
41 |             1: (101 + 111) + (101 + 112),
42 |             2: (201 + 211) + (201 + 212),
43 |         },
44 |     )
45 | 


--------------------------------------------------------------------------------
/tests/spec/combine_series/test_event_series_and_value.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Combining an event series with a value"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         1 | 102
12 |         2 | 201
13 |         2 | 202
14 |     """,
15 | }
16 | 
17 | 
18 | def test_event_series_and_value(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         (e.i1 + 1).sum_for_patient(),
22 |         {
23 |             1: (101 + 1) + (102 + 1),
24 |             2: (201 + 1) + (202 + 1),
25 |         },
26 |     )
27 | 
28 | 
29 | def test_value_and_event_series(spec_test):
30 |     spec_test(
31 |         table_data,
32 |         (1 + e.i1).sum_for_patient(),
33 |         {
34 |             1: (1 + 101) + (1 + 102),
35 |             2: (1 + 201) + (1 + 202),
36 |         },
37 |     )
38 | 


--------------------------------------------------------------------------------
/tests/spec/combine_series/test_patient_series_and_patient_series.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Combining two patient series"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1 |  i2
 9 |         --+-----+-----
10 |         1 | 101 | 102
11 |         2 | 201 | 202
12 |     """,
13 | }
14 | 
15 | 
16 | def test_patient_series_and_patient_series(spec_test):
17 |     spec_test(
18 |         table_data,
19 |         p.i1 + p.i2,
20 |         {
21 |             1: (101 + 102),
22 |             2: (201 + 202),
23 |         },
24 |     )
25 | 


--------------------------------------------------------------------------------
/tests/spec/combine_series/test_patient_series_and_value.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Combining a patient series with a value"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         2 | 201
12 |     """,
13 | }
14 | 
15 | 
16 | def test_patient_series_and_value(spec_test):
17 |     spec_test(
18 |         table_data,
19 |         p.i1 + 1,
20 |         {
21 |             1: (101 + 1),
22 |             2: (201 + 1),
23 |         },
24 |     )
25 | 
26 | 
27 | def test_value_and_patient_series(spec_test):
28 |     spec_test(
29 |         table_data,
30 |         1 + p.i1,
31 |         {
32 |             1: (1 + 101),
33 |             2: (1 + 201),
34 |         },
35 |     )
36 | 


--------------------------------------------------------------------------------
/tests/spec/date_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on all series containing dates"
2 | 


--------------------------------------------------------------------------------
/tests/spec/date_series_ops/test_date_aggregations.py:
--------------------------------------------------------------------------------
 1 | from ehrql import days
 2 | 
 3 | from ..tables import e
 4 | 
 5 | 
 6 | title = "Aggregations which apply to all series containing dates"
 7 | 
 8 | 
 9 | def test_count_episodes(spec_test):
10 |     table_data = {
11 |         e: """
12 |               |     d1
13 |             --+------------
14 |             1 | 2020-01-01
15 |             1 | 2020-01-04
16 |             1 | 2020-01-06
17 |             1 | 2020-01-10
18 |             1 | 2020-01-12
19 |             2 | 2020-01-01
20 |             3 |
21 |             4 | 2020-01-10
22 |             4 |
23 |             4 |
24 |             4 | 2020-01-01
25 |             """,
26 |     }
27 | 
28 |     spec_test(
29 |         table_data,
30 |         e.d1.count_episodes_for_patient(days(3)),
31 |         {
32 |             1: 2,
33 |             2: 1,
34 |             3: 0,
35 |             4: 2,
36 |         },
37 |     )
38 | 


--------------------------------------------------------------------------------
/tests/spec/date_series_ops/test_date_comparison_types.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from ..tables import p
 4 | 
 5 | 
 6 | title = "Types usable in comparisons involving dates"
 7 | 
 8 | table_data = {
 9 |     p: """
10 |           |     d1     |     d2
11 |         --+------------+------------
12 |         1 | 1990-01-01 | 1980-01-01
13 |         2 | 2000-01-01 | 1980-01-01
14 |         3 | 2010-01-01 | 2020-01-01
15 |         4 |            | 2020-01-01
16 |         """,
17 | }
18 | 
19 | 
20 | def test_accepts_python_date_object(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         p.d1.is_before(datetime.date(2000, 1, 20)),
24 |         {
25 |             1: True,
26 |             2: True,
27 |             3: False,
28 |             4: None,
29 |         },
30 |     )
31 | 
32 | 
33 | def test_accepts_iso_formated_date_string(spec_test):
34 |     spec_test(
35 |         table_data,
36 |         p.d1.is_before("2000-01-20"),
37 |         {
38 |             1: True,
39 |             2: True,
40 |             3: False,
41 |             4: None,
42 |         },
43 |     )
44 | 
45 | 
46 | def test_accepts_another_date_series(spec_test):
47 |     spec_test(
48 |         table_data,
49 |         p.d1.is_before(p.d2),
50 |         {
51 |             1: False,
52 |             2: False,
53 |             3: True,
54 |             4: None,
55 |         },
56 |     )
57 | 


--------------------------------------------------------------------------------
/tests/spec/dummy/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Dummy chapter for testing spec generation"
2 | text = "This chapter should not appear in the table of contents"
3 | 


--------------------------------------------------------------------------------
/tests/spec/dummy/test_dummy.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Dummy section for testing spec generation"
 5 | text = "This section should not appear in the table of contents"
 6 | 
 7 | 
 8 | table_data = {
 9 |     p: """
10 |           | b1 | b2
11 |         --+----+----
12 |         1 |  T |  T
13 |         2 |  T |
14 |         3 |  T |  F
15 |         4 |    |  T
16 |         5 |    |
17 |         6 |    |  F
18 |         7 |  F |  T
19 |         8 |  F |
20 |         9 |  F |  F
21 |         """,
22 | }
23 | 
24 | 
25 | def test_function_with_docstring(spec_test):
26 |     """this docstring should appear in the spec"""
27 |     spec_test(
28 |         table_data,
29 |         p.b1 & p.b2,
30 |         {
31 |             1: True,
32 |             2: None,
33 |             3: False,
34 |             4: None,
35 |             5: None,
36 |             6: False,
37 |             7: False,
38 |             8: False,
39 |             9: False,
40 |         },
41 |     )
42 | 
43 | 
44 | def test_function_without_docstring(spec_test):
45 |     spec_test(
46 |         table_data,
47 |         p.b1 & p.b2,
48 |         {
49 |             1: True,
50 |             2: None,
51 |             3: False,
52 |             4: None,
53 |             5: None,
54 |             6: False,
55 |             7: False,
56 |             8: False,
57 |             9: False,
58 |         },
59 |     )
60 | 


--------------------------------------------------------------------------------
/tests/spec/filter/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Filtering an event frame"
2 | 


--------------------------------------------------------------------------------
/tests/spec/float_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on float series"
2 | 


--------------------------------------------------------------------------------
/tests/spec/float_series_ops/test_arithmetic_ops.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Arithmetic operations without division"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  f1   |  f2
 9 |         --+-------+-------
10 |         1 | 101.3 | 111.5
11 |         2 | 201.4 |
12 |         """,
13 | }
14 | 
15 | 
16 | def test_negate(spec_test):
17 |     spec_test(
18 |         table_data,
19 |         -p.f2,
20 |         {1: -111.5, 2: None},
21 |     )
22 | 
23 | 
24 | def test_add(spec_test):
25 |     spec_test(
26 |         table_data,
27 |         p.f1 + p.f2,
28 |         {1: 101.3 + 111.5, 2: None},
29 |     )
30 | 
31 | 
32 | def test_subtract_with_positive_result(spec_test):
33 |     spec_test(
34 |         table_data,
35 |         p.f2 - p.f1,
36 |         {1: 111.5 - 101.3, 2: None},
37 |     )
38 | 
39 | 
40 | def test_subtract_with_negative_result(spec_test):
41 |     spec_test(
42 |         table_data,
43 |         p.f1 - p.f2,
44 |         {1: 101.3 - 111.5, 2: None},
45 |     )
46 | 
47 | 
48 | def test_multiply(spec_test):
49 |     spec_test(
50 |         table_data,
51 |         p.f1 * p.f2,
52 |         {1: 101.3 * 111.5, 2: None},
53 |     )
54 | 
55 | 
56 | def test_multiply_with_constant(spec_test):
57 |     spec_test(
58 |         table_data,
59 |         10.0 * p.f2,
60 |         {1: 10.0 * 111.5, 2: None},
61 |     )
62 | 


--------------------------------------------------------------------------------
/tests/spec/float_series_ops/test_comparison_ops.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Comparison operations"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  f1 |  f2
 9 |         --+-----+-----
10 |         1 | 101.1 | 201.2
11 |         2 | 201.2 | 201.2
12 |         3 | 301.3 | 201.2
13 |         4 |       | 201.2
14 |         """,
15 | }
16 | 
17 | 
18 | def test_less_than(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.f1 < p.f2,
22 |         {1: True, 2: False, 3: False, 4: None},
23 |     )
24 | 
25 | 
26 | def test_less_than_or_equal_to(spec_test):
27 |     spec_test(
28 |         table_data,
29 |         p.f1 <= p.f2,
30 |         {1: True, 2: True, 3: False, 4: None},
31 |     )
32 | 
33 | 
34 | def test_greater_than(spec_test):
35 |     spec_test(
36 |         table_data,
37 |         p.f1 > p.f2,
38 |         {1: False, 2: False, 3: True, 4: None},
39 |     )
40 | 
41 | 
42 | def test_greater_than_or_equal_to(spec_test):
43 |     spec_test(
44 |         table_data,
45 |         p.f1 >= p.f2,
46 |         {1: False, 2: True, 3: True, 4: None},
47 |     )
48 | 


--------------------------------------------------------------------------------
/tests/spec/float_series_ops/test_conversion.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Convert a float value"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |               | i1 | f1
 9 |             --+----+----
10 |             1 | 1  | 1.0
11 |             2 | 42 | 32.3
12 |             3 | 3  | 5.8
13 |             4 | -4 | -6.7
14 |             5 |    | -6.2
15 |             6 |    | 0.5
16 |             7 |    |
17 |             """,
18 | }
19 | 
20 | 
21 | def test_float_as_int(spec_test):
22 |     """
23 |     Floats are rounded towards zero.
24 |     """
25 |     spec_test(
26 |         table_data,
27 |         p.f1.as_int(),
28 |         {1: 1, 2: 32, 3: 5, 4: -6, 5: -6, 6: 0, 7: None},
29 |     )
30 | 
31 | 
32 | def test_float_as_float(spec_test):
33 |     spec_test(
34 |         table_data,
35 |         p.f1.as_float(),
36 |         {1: 1.0, 2: 32.3, 3: 5.8, 4: -6.7, 5: -6.2, 6: 0.5, 7: None},
37 |     )
38 | 
39 | 
40 | def test_add_float_to_int(spec_test):
41 |     spec_test(
42 |         table_data,
43 |         p.f1 + p.i1.as_float(),
44 |         {1: 2.0, 2: 74.3, 3: 8.8, 4: -10.7, 5: None, 6: None, 7: None},
45 |     )
46 | 


--------------------------------------------------------------------------------
/tests/spec/int_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on integer series"
2 | 


--------------------------------------------------------------------------------
/tests/spec/int_series_ops/test_arithmetic_ops.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Arithmetic operations without division"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1 |  i2
 9 |         --+-----+-----
10 |         1 | 101 | 111
11 |         2 | 201 |
12 |         """,
13 | }
14 | 
15 | 
16 | def test_negate(spec_test):
17 |     spec_test(
18 |         table_data,
19 |         -p.i2,
20 |         {1: -111, 2: None},
21 |     )
22 | 
23 | 
24 | def test_add(spec_test):
25 |     spec_test(
26 |         table_data,
27 |         p.i1 + p.i2,
28 |         {1: 101 + 111, 2: None},
29 |     )
30 | 
31 | 
32 | def test_subtract(spec_test):
33 |     spec_test(
34 |         table_data,
35 |         p.i1 - p.i2,
36 |         {1: 101 - 111, 2: None},
37 |     )
38 | 
39 | 
40 | def test_multiply(spec_test):
41 |     spec_test(
42 |         table_data,
43 |         p.i1 * p.i2,
44 |         {1: 101 * 111, 2: None},
45 |     )
46 | 
47 | 
48 | def test_multiply_with_constant(spec_test):
49 |     spec_test(
50 |         table_data,
51 |         10 * p.i2,
52 |         {1: 10 * 111, 2: None},
53 |     )
54 | 


--------------------------------------------------------------------------------
/tests/spec/int_series_ops/test_comparison_ops.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Comparison operations"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1 |  i2
 9 |         --+-----+-----
10 |         1 | 101 | 201
11 |         2 | 201 | 201
12 |         3 | 301 | 201
13 |         4 |     | 201
14 |         """,
15 | }
16 | 
17 | 
18 | def test_less_than(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.i1 < p.i2,
22 |         {1: True, 2: False, 3: False, 4: None},
23 |     )
24 | 
25 | 
26 | def test_less_than_or_equal_to(spec_test):
27 |     spec_test(
28 |         table_data,
29 |         p.i1 <= p.i2,
30 |         {1: True, 2: True, 3: False, 4: None},
31 |     )
32 | 
33 | 
34 | def test_greater_than(spec_test):
35 |     spec_test(
36 |         table_data,
37 |         p.i1 > p.i2,
38 |         {1: False, 2: False, 3: True, 4: None},
39 |     )
40 | 
41 | 
42 | def test_greater_than_or_equal_to(spec_test):
43 |     spec_test(
44 |         table_data,
45 |         p.i1 >= p.i2,
46 |         {1: False, 2: True, 3: True, 4: None},
47 |     )
48 | 


--------------------------------------------------------------------------------
/tests/spec/int_series_ops/test_conversion.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Convert an integer value"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           | i1 | f1
 9 |         --+----+----
10 |         1 | 1  | 1.0
11 |         2 | 32 | 12.4
12 |         3 | 5  | -3.2
13 |         4 |    | 2.1
14 |         """,
15 | }
16 | 
17 | 
18 | def test_integer_as_float(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.i1.as_float(),
22 |         {
23 |             1: 1.0,
24 |             2: 32.0,
25 |             3: 5.0,
26 |             4: None,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_integer_as_int(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         p.i1.as_int(),
35 |         {
36 |             1: 1,
37 |             2: 32,
38 |             3: 5,
39 |             4: None,
40 |         },
41 |     )
42 | 
43 | 
44 | def test_add_int_to_float(spec_test):
45 |     spec_test(
46 |         table_data,
47 |         p.i1 + p.f1.as_int(),
48 |         {1: 2, 2: 44, 3: 2, 4: None},
49 |     )
50 | 


--------------------------------------------------------------------------------
/tests/spec/multi_code_string_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on all series containing multi code strings"
2 | 


--------------------------------------------------------------------------------
/tests/spec/multi_code_string_series_ops/test_containment.py:
--------------------------------------------------------------------------------
 1 | from ehrql.codes import ICD10Code
 2 | 
 3 | from ..tables import p
 4 | 
 5 | 
 6 | title = "Testing for containment using codes"
 7 | 
 8 | table_data = {
 9 |     p: """
10 |           |   m1
11 |         --+--------
12 |         1 | ||E119 ,J849 ,M069 ||I801 ,I802
13 |         2 | ||T202 ,A429 ||A429 ,A420, J170
14 |         3 | ||M139 ,E220 ,M145, M060
15 |         4 |
16 |         """,
17 | }
18 | 
19 | 
20 | def test_contains_code_prefix(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         p.m1.contains("M06"),
24 |         {
25 |             1: True,
26 |             2: False,
27 |             3: True,
28 |             4: None,
29 |         },
30 |     )
31 | 
32 | 
33 | def test_contains_code(spec_test):
34 |     spec_test(
35 |         table_data,
36 |         p.m1.contains(ICD10Code("M069")),
37 |         {
38 |             1: True,
39 |             2: False,
40 |             3: False,
41 |             4: None,
42 |         },
43 |     )
44 | 
45 | 
46 | def test_contains_any_of_codelist(spec_test):
47 |     spec_test(
48 |         table_data,
49 |         p.m1.contains_any_of([ICD10Code("M069"), "A429"]),
50 |         {
51 |             1: True,
52 |             2: True,
53 |             3: False,
54 |             4: None,
55 |         },
56 |     )
57 | 


--------------------------------------------------------------------------------
/tests/spec/population/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Defining the dataset population"
2 | 


--------------------------------------------------------------------------------
/tests/spec/population/test_population.py:
--------------------------------------------------------------------------------
 1 | from ehrql import case, when
 2 | 
 3 | from ..tables import e, p
 4 | 
 5 | 
 6 | title = "Defining a population"
 7 | text = """
 8 | `define_population` is used to limit the population from which data is extracted.
 9 | """
10 | 
11 | 
12 | def test_population_with_single_table(spec_test):
13 |     """
14 |     Extract a column from a patient table after limiting the population by another column.
15 |     """
16 |     table_data = {
17 |         p: """
18 |               | b1 | i1
19 |             --+----+---
20 |             1 | F  | 10
21 |             2 | T  | 20
22 |             3 | F  | 30
23 |         """,
24 |     }
25 | 
26 |     spec_test(
27 |         table_data,
28 |         p.i1,
29 |         {
30 |             1: 10,
31 |             3: 30,
32 |         },
33 |         population=~p.b1,
34 |     )
35 | 
36 | 
37 | def test_population_with_multiple_tables(spec_test):
38 |     """
39 |     Limit the patient population by a column in one table, and return values from another
40 |     table.
41 |     """
42 |     table_data = {
43 |         p: """
44 |               | i1
45 |             --+----
46 |             1 | 10
47 |             2 | 20
48 |             3 | 0
49 |         """,
50 |         e: """
51 |               | i1
52 |             --+-----
53 |             1 | 101
54 |             1 | 102
55 |             3 | 301
56 |             4 | 401
57 |         """,
58 |     }
59 | 
60 |     spec_test(
61 |         table_data,
62 |         e.exists_for_patient(),
63 |         {
64 |             1: True,
65 |             2: False,
66 |         },
67 |         population=p.i1 > 0,
68 |     )
69 | 
70 | 
71 | def test_case_with_case_expression(spec_test):
72 |     """
73 |     Limit the patient population by a case expression.
74 |     """
75 |     table_data = {
76 |         p: """
77 |               | i1
78 |             --+---
79 |             1 | 6
80 |             2 | 7
81 |             3 | 9
82 |             4 |
83 |             """,
84 |     }
85 | 
86 |     spec_test(
87 |         table_data,
88 |         p.i1,
89 |         {
90 |             1: 6,
91 |             2: 7,
92 |         },
93 |         population=case(
94 |             when(p.i1 <= 8).then(True),
95 |             when(p.i1 > 8).then(False),
96 |         ),
97 |     )
98 | 


--------------------------------------------------------------------------------
/tests/spec/series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on all series"
2 | 


--------------------------------------------------------------------------------
/tests/spec/series_ops/test_containment.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Testing for containment"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         2 | 201
12 |         3 | 301
13 |         4 |
14 |         """,
15 | }
16 | 
17 | 
18 | def test_is_in(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.i1.is_in([101, 301]),
22 |         {
23 |             1: True,
24 |             2: False,
25 |             3: True,
26 |             4: None,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_is_not_in(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         p.i1.is_not_in([101, 301]),
35 |         {
36 |             1: False,
37 |             2: True,
38 |             3: False,
39 |             4: None,
40 |         },
41 |     )
42 | 
43 | 
44 | def test_is_in_empty_list(spec_test):
45 |     spec_test(
46 |         table_data,
47 |         p.i1.is_in([]),
48 |         {
49 |             1: False,
50 |             2: False,
51 |             3: False,
52 |             4: False,
53 |         },
54 |     )
55 | 
56 | 
57 | def test_is_not_in_empty_list(spec_test):
58 |     spec_test(
59 |         table_data,
60 |         p.i1.is_not_in([]),
61 |         {
62 |             1: True,
63 |             2: True,
64 |             3: True,
65 |             4: True,
66 |         },
67 |     )
68 | 


--------------------------------------------------------------------------------
/tests/spec/series_ops/test_containment_with_series.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e, p
 2 | 
 3 | 
 4 | title = "Testing for containment in another series"
 5 | 
 6 | 
 7 | table_data = {
 8 |     p: """
 9 |           |  i1
10 |         --+-----
11 |         1 | 101
12 |         2 | 201
13 |         3 | 301
14 |         4 |
15 |         5 | 501
16 |         6 |
17 |         """,
18 |     e: """
19 |           |  i1
20 |         --+-----
21 |         1 | 101
22 |         2 | 201
23 |         2 | 203
24 |         2 | 301
25 |         3 | 333
26 |         3 | 334
27 |         4 |
28 |         4 | 401
29 |         5 |
30 |         5 | 101
31 |         """,
32 | }
33 | 
34 | 
35 | def test_is_in_series(spec_test):
36 |     spec_test(
37 |         table_data,
38 |         p.i1.is_in(e.i1),
39 |         {
40 |             1: True,
41 |             2: True,
42 |             3: False,
43 |             4: None,
44 |             5: False,
45 |             6: False,
46 |         },
47 |     )
48 | 
49 | 
50 | def test_is_not_in_series(spec_test):
51 |     spec_test(
52 |         table_data,
53 |         p.i1.is_not_in(e.i1),
54 |         {
55 |             1: False,
56 |             2: False,
57 |             3: True,
58 |             4: None,
59 |             5: True,
60 |             6: True,
61 |         },
62 |     )
63 | 


--------------------------------------------------------------------------------
/tests/spec/series_ops/test_equality.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Testing for equality"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1 |  i2
 9 |         --+-----+-----
10 |         1 | 101 | 101
11 |         2 | 201 | 202
12 |         3 | 301 |
13 |         4 |     |
14 |         """,
15 | }
16 | 
17 | 
18 | def test_equals(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.i1 == p.i2,
22 |         {
23 |             1: True,
24 |             2: False,
25 |             3: None,
26 |             4: None,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_not_equals(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         p.i1 != p.i2,
35 |         {
36 |             1: False,
37 |             2: True,
38 |             3: None,
39 |             4: None,
40 |         },
41 |     )
42 | 
43 | 
44 | def test_is_null(spec_test):
45 |     spec_test(
46 |         table_data,
47 |         p.i1.is_null(),
48 |         {
49 |             1: False,
50 |             2: False,
51 |             3: False,
52 |             4: True,
53 |         },
54 |     )
55 | 
56 | 
57 | def test_is_not_null(spec_test):
58 |     spec_test(
59 |         table_data,
60 |         p.i1.is_not_null(),
61 |         {
62 |             1: True,
63 |             2: True,
64 |             3: True,
65 |             4: False,
66 |         },
67 |     )
68 | 


--------------------------------------------------------------------------------
/tests/spec/series_ops/test_map_values.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Map from one set of values to another"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         2 | 201
12 |         3 | 301
13 |         4 |
14 |         """,
15 | }
16 | 
17 | 
18 | def test_map_values(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.i1.map_values({101: "a", 201: "b", 301: "a"}, default="c"),
22 |         {
23 |             1: "a",
24 |             2: "b",
25 |             3: "a",
26 |             4: "c",
27 |         },
28 |     )
29 | 


--------------------------------------------------------------------------------
/tests/spec/series_ops/test_when_null_then.py:
--------------------------------------------------------------------------------
 1 | from ..tables import p
 2 | 
 3 | 
 4 | title = "Replace missing values"
 5 | 
 6 | table_data = {
 7 |     p: """
 8 |           |  i1
 9 |         --+-----
10 |         1 | 101
11 |         2 | 201
12 |         3 | 301
13 |         4 |
14 |         """,
15 | }
16 | 
17 | 
18 | def test_when_null_then_integer_column(spec_test):
19 |     spec_test(
20 |         table_data,
21 |         p.i1.when_null_then(0),
22 |         {
23 |             1: 101,
24 |             2: 201,
25 |             3: 301,
26 |             4: 0,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_when_null_then_boolean_column(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         p.i1.is_in([101, 201]).when_null_then(False),
35 |         {
36 |             1: True,
37 |             2: True,
38 |             3: False,
39 |             4: False,
40 |         },
41 |     )
42 | 


--------------------------------------------------------------------------------
/tests/spec/sort_and_pick/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Picking one row for each patient from an event frame"
2 | 


--------------------------------------------------------------------------------
/tests/spec/sort_and_pick/test_sort_by_column_and_pick.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Picking the first or last row for each patient"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1
 9 |         --+----
10 |         1 | 101
11 |         1 | 102
12 |         1 | 103
13 |         2 | 203
14 |         2 | 202
15 |         2 | 201
16 |         """,
17 | }
18 | 
19 | 
20 | def test_sort_by_column_pick_first(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         e.sort_by(e.i1).first_for_patient().i1,
24 |         {
25 |             1: 101,
26 |             2: 201,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_sort_by_column_pick_last(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         e.sort_by(e.i1).last_for_patient().i1,
35 |         {
36 |             1: 103,
37 |             2: 203,
38 |         },
39 |     )
40 | 


--------------------------------------------------------------------------------
/tests/spec/sort_and_pick/test_sort_by_column_with_nulls_and_pick.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Picking the first or last row for each patient where a column contains NULLs"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1
 9 |         --+-----
10 |         1 |
11 |         1 | 102
12 |         1 | 103
13 |         2 | 203
14 |         2 | 202
15 |         2 |
16 |         """,
17 | }
18 | 
19 | 
20 | def test_sort_by_column_with_nulls_and_pick_first(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         e.sort_by(e.i1).first_for_patient().i1,
24 |         {
25 |             1: None,
26 |             2: None,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_sort_by_column_with_nulls_and_pick_last(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         e.sort_by(e.i1).last_for_patient().i1,
35 |         {
36 |             1: 103,
37 |             2: 203,
38 |         },
39 |     )
40 | 


--------------------------------------------------------------------------------
/tests/spec/sort_and_pick/test_sort_by_interleaved_with_where.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Mixing the order of `sort_by` and `where` operations"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1 | i2
 9 |         --+-----+---
10 |         1 | 101 | 1
11 |         1 | 102 | 2
12 |         1 | 103 | 2
13 |         2 | 203 | 1
14 |         2 | 202 | 2
15 |         2 | 201 | 2
16 |         """,
17 | }
18 | 
19 | 
20 | def test_sort_by_before_where(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         e.sort_by(e.i1).where(e.i1 > 102).first_for_patient().i1,
24 |         {
25 |             1: 103,
26 |             2: 201,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_sort_by_interleaved_with_where(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         e.sort_by(e.i1).where(e.i2 > 1).sort_by(e.i2).first_for_patient().i1,
35 |         {
36 |             1: 102,
37 |             2: 201,
38 |         },
39 |     )
40 | 


--------------------------------------------------------------------------------
/tests/spec/sort_and_pick/test_sort_by_multiple_columns_and_pick.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Sort by more than one column and pick the first or last row for each patient"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1 | i2
 9 |         --+-----+---
10 |         1 | 101 | 3
11 |         1 | 102 | 2
12 |         1 | 102 | 1
13 |         2 | 203 | 1
14 |         2 | 202 | 2
15 |         2 | 202 | 3
16 |         """,
17 | }
18 | 
19 | 
20 | def test_sort_by_multiple_columns_pick_first(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         e.sort_by(e.i1, e.i2).first_for_patient().i2,
24 |         {
25 |             1: 3,
26 |             2: 2,
27 |         },
28 |     )
29 | 
30 | 
31 | def test_sort_by_multiple_columns_pick_last(spec_test):
32 |     spec_test(
33 |         table_data,
34 |         e.sort_by(e.i1, e.i2).last_for_patient().i2,
35 |         {
36 |             1: 2,
37 |             2: 1,
38 |         },
39 |     )
40 | 


--------------------------------------------------------------------------------
/tests/spec/sort_and_pick/test_sort_extends_to_all_columns_when_underspecified.py:
--------------------------------------------------------------------------------
 1 | from ..tables import e
 2 | 
 3 | 
 4 | title = "Sort extends to all columns when underspecified to ensure that sort order is consistent"
 5 | 
 6 | table_data = {
 7 |     e: """
 8 |           |  i1 | i2 |  i3
 9 |         --+----------------
10 |         1 | 100 |  2 | 101
11 |         1 | 100 |  1 | 102
12 |         1 | 100 |  1 | 103
13 |         2 | 100 |  0 | 500
14 |         2 | 100 |  1 |   1
15 |         2 | 101 |  0 |   1
16 |         """,
17 | }
18 | 
19 | 
20 | def test_sorting_extends_to_selected_column(spec_test):
21 |     spec_test(
22 |         table_data,
23 |         e.sort_by(e.i1, e.i2).first_for_patient().i3,
24 |         {1: 102, 2: 500},
25 |     )
26 | 


--------------------------------------------------------------------------------
/tests/spec/str_series_ops/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Operations on all series containing strings"
2 | 


--------------------------------------------------------------------------------
/tests/spec/table_from_rows/__init__.py:
--------------------------------------------------------------------------------
1 | title = "Defining a table using inline data"
2 | 


--------------------------------------------------------------------------------
/tests/spec/table_from_rows/test_table_from_rows.py:
--------------------------------------------------------------------------------
 1 | from ehrql.tables import PatientFrame, Series, table_from_rows
 2 | 
 3 | from ..tables import p
 4 | 
 5 | 
 6 | title = "Defining a table using inline data"
 7 | 
 8 | table_data = {
 9 |     p: """
10 |           | i1
11 |         --+----
12 |         1 | 10
13 |         2 | 20
14 |         3 | 30
15 |         """,
16 | }
17 | 
18 | 
19 | def test_table_from_rows(spec_test):
20 |     inline_data = [
21 |         (1, 100),
22 |         (3, 300),
23 |     ]
24 | 
25 |     @table_from_rows(inline_data)
26 |     class t(PatientFrame):
27 |         n = Series(int)
28 | 
29 |     spec_test(
30 |         table_data,
31 |         p.i1 + t.n,
32 |         {
33 |             1: 10 + 100,
34 |             2: None,
35 |             3: 30 + 300,
36 |         },
37 |     )
38 | 


--------------------------------------------------------------------------------
/tests/spec/tables.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from ehrql.codes import ICD10MultiCodeString, OPCS4MultiCodeString, SNOMEDCTCode
 4 | from ehrql.tables import EventFrame, PatientFrame, Series, table
 5 | 
 6 | 
 7 | @table
 8 | class patient_level_table(PatientFrame):
 9 |     i1 = Series(int)
10 |     i2 = Series(int)
11 |     b1 = Series(bool)
12 |     b2 = Series(bool)
13 |     c1 = Series(SNOMEDCTCode)
14 |     m1 = Series(ICD10MultiCodeString)
15 |     m2 = Series(OPCS4MultiCodeString)
16 |     d1 = Series(datetime.date)
17 |     d2 = Series(datetime.date)
18 |     s1 = Series(str)
19 |     s2 = Series(str)
20 |     f1 = Series(float)
21 |     f2 = Series(float)
22 | 
23 | 
24 | @table
25 | class event_level_table(EventFrame):
26 |     i1 = Series(int)
27 |     i2 = Series(int)
28 |     i3 = Series(int)
29 |     b1 = Series(bool)
30 |     b2 = Series(bool)
31 |     c1 = Series(SNOMEDCTCode)
32 |     m1 = Series(ICD10MultiCodeString)
33 |     m2 = Series(OPCS4MultiCodeString)
34 |     d1 = Series(datetime.date)
35 |     d2 = Series(datetime.date)
36 |     s1 = Series(str)
37 |     s2 = Series(str)
38 |     f1 = Series(float)
39 |     f2 = Series(float)
40 | 
41 | 
42 | # Define short aliases for terser tests
43 | p = patient_level_table
44 | e = event_level_table
45 | 


--------------------------------------------------------------------------------
/tests/spec/test_conftest.py:
--------------------------------------------------------------------------------
 1 | from ehrql.query_model.nodes import Column, TableSchema
 2 | 
 3 | from .conftest import parse_row, parse_table
 4 | 
 5 | 
 6 | def test_parse_table():
 7 |     assert parse_table(
 8 |         TableSchema(i1=Column(int), i2=Column(int)),
 9 |         """
10 |           |  i1 |  i2
11 |         --+-----+-----
12 |         1 | 101 | 111
13 |         2 | 201 |
14 |         """,
15 |     ) == [
16 |         {"patient_id": 1, "i1": 101, "i2": 111},
17 |         {"patient_id": 2, "i1": 201, "i2": None},
18 |     ]
19 | 
20 | 
21 | def test_parse_row():
22 |     assert parse_row(
23 |         {"patient_id": int, "i1": int, "i2": int},
24 |         ["patient_id", "i1", "i2"],
25 |         "1 | 101 | 111",
26 |     ) == {"patient_id": 1, "i1": 101, "i2": 111}
27 | 


--------------------------------------------------------------------------------
/tests/spec/toc.py:
--------------------------------------------------------------------------------
 1 | # Table of contents for documentation generated from specs
 2 | 
 3 | contents = {
 4 |     "filter": [
 5 |         "test_where",
 6 |         "test_except_where",
 7 |     ],
 8 |     "sort_and_pick": [
 9 |         "test_sort_by_column_and_pick",
10 |         "test_sort_by_multiple_columns_and_pick",
11 |         "test_sort_by_column_with_nulls_and_pick",
12 |         "test_sort_by_interleaved_with_where",
13 |     ],
14 |     "aggregate_frame": [
15 |         "test_exists_for_patient",
16 |         "test_count_for_patient",
17 |     ],
18 |     "aggregate_series": [
19 |         "test_minimum_and_maximum_for_patient",
20 |         "test_sum_for_patient",
21 |         "test_mean_for_patient",
22 |         "test_count_distinct_for_patient",
23 |     ],
24 |     "combine_series": [
25 |         "test_patient_series_and_patient_series",
26 |         "test_patient_series_and_value",
27 |         "test_event_series_and_event_series",
28 |         "test_event_series_and_patient_series",
29 |         "test_event_series_and_value",
30 |     ],
31 |     "series_ops": [
32 |         "test_equality",
33 |         "test_containment",
34 |         "test_containment_with_series",
35 |         "test_map_values",
36 |         "test_when_null_then",
37 |         "test_maximum_of_and_minimum_of_patient_series",
38 |         "test_maximum_of_and_minimum_of_event_series",
39 |     ],
40 |     "bool_series_ops": [
41 |         "test_logical_ops",
42 |         "test_conversion",
43 |     ],
44 |     "int_series_ops": [
45 |         "test_arithmetic_ops",
46 |         "test_comparison_ops",
47 |     ],
48 |     "code_series_ops": [
49 |         "test_containment",
50 |         "test_map_codes_to_categories",
51 |     ],
52 |     "multi_code_string_series_ops": [
53 |         "test_containment",
54 |     ],
55 |     "case_expressions": [
56 |         "test_case",
57 |         "test_when",
58 |     ],
59 |     "date_series_ops": [
60 |         "test_date_series_ops",
61 |         "test_date_comparisons",
62 |         "test_date_comparison_types",
63 |         "test_date_aggregations",
64 |     ],
65 |     "str_series_ops": [
66 |         "test_contains",
67 |     ],
68 |     "population": [
69 |         "test_population",
70 |     ],
71 |     "table_from_rows": [
72 |         "test_table_from_rows",
73 |     ],
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/support/mssql/setup.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE test
2 | ALTER DATABASE test SET COMPATIBILITY_LEVEL = 100;
3 | 
4 | CREATE DATABASE temp_tables
5 | ALTER DATABASE temp_tables SET COMPATIBILITY_LEVEL = 100;
6 | 
7 | SET QUOTED_IDENTIFIER ON;
8 | GO
9 | 


--------------------------------------------------------------------------------
/tests/support/trino/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | if [ "$1" = '/usr/lib/trino/bin/run-trino' ]; then
 6 |   # If this is the container's first run, initialize the application
 7 |   # database
 8 |   if [ ! -f /tmp/app-initialized ]; then
 9 |     # Initialize the application database asynchronously in a
10 |     # background process. This allows a) the trino process to be
11 |     # the main process in the container, which allows graceful
12 |     # shutdown and other goodies, and b) us to only start the trino
13 |     # process once, as opposed to starting, stopping, then
14 |     # starting it again.
15 |     function initialize_app_database() {
16 |         timeout=20
17 |         limit="$((SECONDS + timeout))"
18 | 
19 |         # Note that the container has been initialized so future
20 |         # starts won't wipe changes to the data
21 |         touch /tmp/app-initialized
22 |     }
23 | 
24 |     initialize_app_database &
25 |   fi
26 | fi
27 | 
28 | # The Docker library we're using hides stdout from us if the container exits with an error, so send everything to
29 | # stderr.
30 | exec "$@" 1>&2
31 | 


--------------------------------------------------------------------------------
/tests/support/trino/etc/catalog/trino.properties:
--------------------------------------------------------------------------------
1 | connector.name=memory
2 | memory.max-data-per-node=128MB
3 | 


--------------------------------------------------------------------------------
/tests/support/trino/etc/config.properties:
--------------------------------------------------------------------------------
 1 | #single node install config
 2 | coordinator=true
 3 | node-scheduler.include-coordinator=true
 4 | http-server.http.port=8080
 5 | discovery.uri=http://localhost:8080
 6 | 
 7 | # Attempt to retry failed queries. It's possible this will get around the
 8 | # transient "No nodes available to run query" errors that we're seeing in the
 9 | # long-running generative tests. See:
10 | # https://trino.io/docs/current/admin/properties-query-management.html#retry-policy
11 | # https://trino.io/docs/current/admin/fault-tolerant-execution.html
12 | retry-policy=QUERY
13 | 


--------------------------------------------------------------------------------
/tests/support/trino/etc/jvm.config:
--------------------------------------------------------------------------------
 1 | -server
 2 | -agentpath:/usr/lib/trino/bin/libjvmkill.so
 3 | # Reduced both the below values from 80% so Trino doesn't try to hog all the
 4 | # RAM which we need to run other database containers
 5 | -XX:InitialRAMPercentage=40
 6 | -XX:MaxRAMPercentage=40
 7 | -XX:G1HeapRegionSize=32M
 8 | -XX:+ExplicitGCInvokesConcurrent
 9 | -XX:+HeapDumpOnOutOfMemoryError
10 | -XX:+ExitOnOutOfMemoryError
11 | -XX:-OmitStackTraceInFastThrow
12 | -XX:ReservedCodeCacheSize=256M
13 | -XX:PerMethodRecompilationCutoff=10000
14 | -XX:PerBytecodeRecompilationCutoff=10000
15 | -Djdk.attach.allowAttachSelf=true
16 | -Djdk.nio.maxCachedBufferSize=2000000
17 | # Improve AES performance for S3, etc. on ARM64 (JDK-8271567)
18 | -XX:+UnlockDiagnosticVMOptions
19 | -XX:+UseAESCTRIntrinsics
20 | # Disable Preventive GC for performance reasons (JDK-8293861)
21 | -XX:-G1UsePreventiveGC
22 | 


--------------------------------------------------------------------------------
/tests/support/trino/etc/log.properties:
--------------------------------------------------------------------------------
1 | # Enable verbose logging from Trino
2 | #io.trino=DEBUG
3 | 


--------------------------------------------------------------------------------
/tests/support/trino/etc/node.properties:
--------------------------------------------------------------------------------
1 | node.environment=docker
2 | node.data-dir=/data/trino
3 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/backends/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/backends/__init__.py


--------------------------------------------------------------------------------
/tests/unit/backends/test_emis.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from trino import exceptions as trino_exceptions
 3 | 
 4 | from ehrql.backends.emis import EMISBackend
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "exception",
 9 |     [
10 |         # These are trino errors that we may want to support in future with
11 |         # custom exit codes, but currently inherit from the base method
12 |         # Database errors
13 |         trino_exceptions.DatabaseError,
14 |         # OperationError is a subclass of DatabaseError
15 |         trino_exceptions.OperationalError,
16 |         # TrinoQueryError is encountered for over-complex/over-nested queries
17 |         trino_exceptions.TrinoQueryError,
18 |         # TrinoUserError is encountered for out of range numbers
19 |         trino_exceptions.TrinoUserError,
20 |         # TrinoUserError is encountered for bad/out of range dates
21 |         trino_exceptions.TrinoDataError,
22 |     ],
23 | )
24 | def test_backend_exceptions(exception):
25 |     backend = EMISBackend()
26 |     assert backend.get_exit_status_for_exception(exception) is None
27 | 


--------------------------------------------------------------------------------
/tests/unit/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/docs/__init__.py


--------------------------------------------------------------------------------
/tests/unit/docs/test_common.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.docs.common import get_docstring, get_function_body
 4 | 
 5 | 
 6 | class ExampleClass:
 7 |     # Comments above
 8 |     @staticmethod
 9 |     def example_method_with_docstring(
10 |         arg1: int,
11 |         arg2: str,
12 |     ) -> str:  # pragma: no cover
13 |         """
14 |         Docstring goes here
15 |         """
16 |         # Make it bigger
17 |         arg1 = arg1 + 100
18 |         # Make it smaller
19 |         arg1 = arg1 // 2
20 |         return arg2 + str(arg1)
21 | 
22 |     def example_method_no_docstring(self):  # pragma: no cover
23 |         # Return the thing
24 |         return "foo"
25 | 
26 | 
27 | EXPECTED_WITH_DOCSTRING = """\
28 | # Make it bigger
29 | arg1 = arg1 + 100
30 | # Make it smaller
31 | arg1 = arg1 // 2
32 | return arg2 + str(arg1)
33 | """
34 | 
35 | 
36 | EXPECTED_NO_DOCSTRING = """\
37 | # Return the thing
38 | return "foo"
39 | """
40 | 
41 | 
42 | @pytest.mark.parametrize(
43 |     "method,expected",
44 |     [
45 |         (ExampleClass.example_method_with_docstring, EXPECTED_WITH_DOCSTRING),
46 |         (ExampleClass.example_method_no_docstring, EXPECTED_NO_DOCSTRING),
47 |     ],
48 | )
49 | def test_get_function_body(method, expected):
50 |     assert get_function_body(method) == expected
51 | 
52 | 
53 | def test_get_docstring():
54 |     assert (
55 |         get_docstring(ExampleClass.example_method_with_docstring)
56 |         == "Docstring goes here"
57 |     )
58 | 
59 | 
60 | def test_get_docstring_with_default():
61 |     assert (
62 |         get_docstring(ExampleClass.example_method_no_docstring, default="foo") == "foo"
63 |     )
64 | 
65 | 
66 | def test_get_docstring_with_error():
67 |     with pytest.raises(ValueError, match="No docstring defined for public object"):
68 |         get_docstring(ExampleClass.example_method_no_docstring)
69 | 


--------------------------------------------------------------------------------
/tests/unit/docs/test_language.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.docs.language import is_included_attr
 4 | from ehrql.utils.docs_utils import exclude_from_docs
 5 | 
 6 | 
 7 | class Example:
 8 |     some_attr = "some_value"
 9 | 
10 |     def some_method(self):
11 |         raise NotImplementedError()
12 | 
13 |     @property
14 |     def some_property(self):
15 |         raise NotImplementedError()
16 | 
17 |     def _some_internal_method(self):
18 |         raise NotImplementedError()
19 | 
20 |     @exclude_from_docs
21 |     def some_excluded_method(self):
22 |         raise NotImplementedError()
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     "name,expected",
27 |     [
28 |         ("some_attr", False),
29 |         ("some_method", True),
30 |         ("some_property", True),
31 |         ("_some_internal_method", False),
32 |         ("some_excluded_method", False),
33 |     ],
34 | )
35 | def test_is_included_attr(name, expected):
36 |     value = getattr(Example, name)
37 |     assert is_included_attr(name, value) == expected
38 | 


--------------------------------------------------------------------------------
/tests/unit/docs/test_schemas.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.docs.schemas import get_table_docstring
 4 | from ehrql.tables import EventFrame, Series, table
 5 | 
 6 | 
 7 | def test_get_table_docstring():
 8 |     @table
 9 |     class parent_table(EventFrame):
10 |         "I have a docstring"
11 | 
12 |         col_a = Series(str)
13 | 
14 |     @table
15 |     class child_table(parent_table.__class__):
16 |         """
17 |         I have a docstring
18 | 
19 |         With some extra stuff
20 |         """
21 | 
22 |         col_b = Series(str)
23 | 
24 |     assert (
25 |         get_table_docstring(child_table.__class__)
26 |         == "I have a docstring\n\nWith some extra stuff"
27 |     )
28 | 
29 | 
30 | def test_get_table_docstring_with_mismatch():
31 |     @table
32 |     class parent_table(EventFrame):
33 |         "I have a docstring"
34 | 
35 |         col_a = Series(str)
36 | 
37 |     @table
38 |     class child_table(parent_table.__class__):
39 |         "I have a different docstring"
40 | 
41 |         col_b = Series(str)
42 | 
43 |     with pytest.raises(ValueError):
44 |         get_table_docstring(child_table.__class__)
45 | 


--------------------------------------------------------------------------------
/tests/unit/dummy_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/dummy_data/__init__.py


--------------------------------------------------------------------------------
/tests/unit/dummy_data/test_dependencies.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import pytest
 4 | 
 5 | import ehrql.dummy_data as dummy_data
 6 | 
 7 | 
 8 | PY_FILES = list(pathlib.Path(dummy_data.__file__).parent.glob("*.py"))
 9 | 
10 | 
11 | @pytest.mark.parametrize("file", PY_FILES, ids=[f.name for f in PY_FILES])
12 | def test_dummy_data_does_not_refer_to_nextgen(file):
13 |     with open(file) as i:
14 |         source = i.read()
15 | 
16 |     namespace = {}
17 |     exec(source, namespace, namespace)
18 |     for name, value in namespace.items():
19 |         if hasattr(value, "__module__") and value.__module__ is not None:
20 |             assert "dummy_data_nextgen" not in value.__module__, name
21 | 


--------------------------------------------------------------------------------
/tests/unit/dummy_data_nextgen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/dummy_data_nextgen/__init__.py


--------------------------------------------------------------------------------
/tests/unit/file_formats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/file_formats/__init__.py


--------------------------------------------------------------------------------
/tests/unit/file_formats/test_base.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.file_formats.base import FileValidationError, validate_columns
 4 | from ehrql.query_model.column_specs import ColumnSpec
 5 | 
 6 | 
 7 | def test_validate_columns():
 8 |     # Column order is not significant, neither is the presence of additional columns so
 9 |     # long as all required columns are present
10 |     validate_columns(
11 |         ["a", "b", "c", "d"],
12 |         {
13 |             "c": ColumnSpec(int),
14 |             "b": ColumnSpec(int),
15 |             "a": ColumnSpec(int),
16 |         },
17 |     )
18 | 
19 | 
20 | def test_validate_columns_fails_on_missing_columns_by_default():
21 |     with pytest.raises(FileValidationError, match="Missing columns: b, d"):
22 |         validate_columns(
23 |             ["c", "a"],
24 |             {
25 |                 "a": ColumnSpec(int),
26 |                 "b": ColumnSpec(int),
27 |                 "c": ColumnSpec(int),
28 |                 "d": ColumnSpec(int),
29 |             },
30 |         )
31 | 
32 | 
33 | def test_validate_columns_allows_missing_columns():
34 |     validate_columns(
35 |         ["c", "a"],
36 |         {
37 |             "a": ColumnSpec(int),
38 |             "b": ColumnSpec(int),
39 |             "c": ColumnSpec(int),
40 |         },
41 |         allow_missing_columns=True,
42 |     )
43 | 
44 | 
45 | def test_validate_columns_does_not_allow_missing_nonnullable_columns():
46 |     with pytest.raises(FileValidationError, match="Missing columns: b"):
47 |         validate_columns(
48 |             ["c", "a"],
49 |             {
50 |                 "a": ColumnSpec(int),
51 |                 "b": ColumnSpec(int, nullable=False),
52 |                 "c": ColumnSpec(int),
53 |             },
54 |             allow_missing_columns=True,
55 |         )
56 | 


--------------------------------------------------------------------------------
/tests/unit/measures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/measures/__init__.py


--------------------------------------------------------------------------------
/tests/unit/measures/test_disclosure_control.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.measures.disclosure_control import apply_sdc
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("i,expected", [(6, 0), (7, 0), (8, 10)])
 7 | def test_apply_sdc(i, expected):
 8 |     assert apply_sdc(i) == expected
 9 | 
10 | 
11 | @pytest.mark.parametrize("bad_value", [-1, 7.1])
12 | def test_apply_sdc_with_bad_value(bad_value):
13 |     with pytest.raises(AssertionError):
14 |         apply_sdc(bad_value)
15 | 


--------------------------------------------------------------------------------
/tests/unit/query_engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/query_engines/__init__.py


--------------------------------------------------------------------------------
/tests/unit/query_engines/test_in_memory.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | 
 3 | from ehrql.query_engines.in_memory import InMemoryQueryEngine
 4 | from ehrql.query_engines.in_memory_database import InMemoryDatabase
 5 | from ehrql.query_language import EventFrame, Series, table
 6 | 
 7 | 
 8 | @table
 9 | class events(EventFrame):
10 |     date = Series(date)
11 | 
12 | 
13 | def test_pick_one_row_per_patient():
14 |     # This test verifies that picking one row per patient works without first having
15 |     # applied QM transformations to all variables in a dataset.
16 |     database = InMemoryDatabase(
17 |         {
18 |             events._qm_node: [
19 |                 (1, date(2023, 1, 1)),
20 |             ],
21 |         }
22 |     )
23 |     engine = InMemoryQueryEngine(database)
24 |     engine.cache = {}
25 |     frame = events.sort_by(events.date).first_for_patient()
26 |     engine.visit(frame._qm_node)
27 | 


--------------------------------------------------------------------------------
/tests/unit/query_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/query_model/__init__.py


--------------------------------------------------------------------------------
/tests/unit/query_model/test_constraints.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | 
 3 | from ehrql.tables import Constraint
 4 | 
 5 | 
 6 | def test_categorical_validation():
 7 |     c = Constraint.Categorical((1, "a"))
 8 |     assert c.validate("a")
 9 |     assert c.validate(None)
10 |     assert not c.validate("")
11 |     assert not c.validate("b")
12 | 
13 | 
14 | def test_not_null_validation():
15 |     c = Constraint.NotNull()
16 |     assert c.validate(1)
17 |     assert not c.validate(None)
18 | 
19 | 
20 | def test_unique_validation():
21 |     c = Constraint.Unique()
22 |     assert c.validate(1)
23 | 
24 | 
25 | def test_first_of_month_validation():
26 |     c = Constraint.FirstOfMonth()
27 |     assert c.validate(date(2024, 1, 1))
28 |     assert c.validate(None)
29 |     assert not c.validate(date(2024, 1, 2))
30 | 
31 | 
32 | def test_regex_validation():
33 |     c = Constraint.Regex("E020[0-9]{5}")
34 |     assert c.validate("E02012345")
35 |     assert c.validate(None)
36 |     assert not c.validate("")
37 |     assert not c.validate("E020")
38 | 
39 | 
40 | def test_closed_range_validation():
41 |     c = Constraint.ClosedRange(1, 3)
42 |     assert c.validate(2)
43 |     assert c.validate(None)
44 |     assert not c.validate(0)
45 |     assert not c.validate(4)
46 | 


--------------------------------------------------------------------------------
/tests/unit/query_model/test_graphs.py:
--------------------------------------------------------------------------------
 1 | from ehrql import Dataset
 2 | from ehrql.query_model.graphs import build_graph
 3 | from ehrql.tables.tpp import patients
 4 | 
 5 | 
 6 | def test_build_graph():
 7 |     dataset = Dataset()
 8 |     year = patients.date_of_birth.year
 9 |     dataset.define_population(year >= 1940)
10 |     dataset.year = year
11 | 
12 |     # We just want to check that nothing blows up
13 |     build_graph(dataset._compile())
14 | 


--------------------------------------------------------------------------------
/tests/unit/test_example_data.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | import ehrql
 6 | from ehrql.query_engines.local_file import LocalFileQueryEngine
 7 | from ehrql.query_language import BaseFrame
 8 | from ehrql.tables import core
 9 | 
10 | 
11 | # Example CSV files are given for all core tables
12 | EXAMPLE_TABLES = [getattr(core, table) for table in core.__all__]
13 | 
14 | EXAMPLE_DATA_DIR = Path(ehrql.__file__).parent / "example-data"
15 | 
16 | 
17 | @pytest.mark.parametrize(
18 |     "ql_table",
19 |     EXAMPLE_TABLES,
20 |     ids=lambda t: f"{t.__module__}.{t.__class__.__qualname__}",
21 | )
22 | def test_populate_database_using_example_data(ql_table: BaseFrame):
23 |     # The engine populates the database with the example data and validates the column
24 |     # specs in the process
25 |     engine = LocalFileQueryEngine(EXAMPLE_DATA_DIR)
26 |     engine.populate_database([ql_table._qm_node], allow_missing_columns=False)
27 | 


--------------------------------------------------------------------------------
/tests/unit/test_pyproject_minimal.py:
--------------------------------------------------------------------------------
 1 | import toml
 2 | 
 3 | 
 4 | def test_pyproject_minimal_is_subset_of_pyproject():
 5 |     with open("pyproject.toml") as f:
 6 |         pyproject = toml.load(f)
 7 |     with open("pyproject.minimal.toml") as f:
 8 |         minimal = toml.load(f)
 9 | 
10 |     # `pyproject.minimal.toml` doesn't need to contain everything `pyproject.toml`
11 |     # contains, but whatever it does contain should agree with `pyproject.toml`
12 |     assert minimal.keys() == {"project"}
13 |     for key, value in minimal["project"].items():
14 |         assert value == pyproject["project"][key]
15 | 


--------------------------------------------------------------------------------
/tests/unit/test_sqlalchemy_types.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import pytest
 4 | from sqlalchemy import types
 5 | 
 6 | from ehrql.codes import CTV3Code
 7 | from ehrql.sqlalchemy_types import type_from_python_type
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     "type_,expected",
12 |     [
13 |         (bool, types.Boolean),
14 |         (datetime.date, types.Date),
15 |         (float, types.Float),
16 |         (int, types.Integer),
17 |         (str, types.String),
18 |         (CTV3Code, types.String),
19 |     ],
20 | )
21 | def test_type_from_python_type(type_, expected):
22 |     assert type_from_python_type(type_) == expected
23 | 
24 | 
25 | class UnknownType: ...
26 | 
27 | 
28 | def test_type_from_python_type_raises_error_on_unknown_type():
29 |     with pytest.raises(TypeError):
30 |         type_from_python_type(UnknownType)
31 | 
32 | 
33 | class TypeWithMethod:
34 |     @classmethod
35 |     def _primitive_type(cls):
36 |         return int
37 | 
38 | 
39 | def test_type_from_python_type_respects_primitive_type_method():
40 |     assert type_from_python_type(TypeWithMethod) == types.Integer
41 | 


--------------------------------------------------------------------------------
/tests/unit/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensafely-core/ehrql/6753ca2a4c3170d9246dbc1f8b066844da708948/tests/unit/utils/__init__.py


--------------------------------------------------------------------------------
/tests/unit/utils/test_functools_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.utils.functools_utils import singledispatchmethod_with_cache
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def TestClass():
 8 |     COUNTER = 0
 9 | 
10 |     class TestClass:
11 |         @singledispatchmethod_with_cache
12 |         def test(self, value):
13 |             assert False
14 | 
15 |         @test.register(str)
16 |         def test_str(self, value):
17 |             # Use a shared counter to give different results for each call
18 |             nonlocal COUNTER
19 |             COUNTER += 1
20 |             return value, COUNTER
21 | 
22 |     return TestClass
23 | 
24 | 
25 | def test_results_are_cached(TestClass):
26 |     obj = TestClass()
27 |     assert obj.test("hello") is obj.test("hello")
28 | 
29 | 
30 | def test_cache_is_unique_to_instances(TestClass):
31 |     obj1 = TestClass()
32 |     obj2 = TestClass()
33 |     assert obj1.test("hello") is not obj2.test("hello")
34 | 
35 | 
36 | def test_cache_can_be_cleared(TestClass):
37 |     obj = TestClass()
38 |     result = obj.test("hello")
39 |     obj.test.cache_clear()
40 |     assert result is not obj.test("hello")
41 | 
42 | 
43 | def test_clearing_cache_only_affects_single_instance(TestClass):
44 |     obj1 = TestClass()
45 |     obj2 = TestClass()
46 |     result1 = obj1.test("hello")
47 |     result2 = obj2.test("hello")
48 |     obj1.test.cache_clear()
49 |     assert result1 is not obj1.test("hello")
50 |     assert result2 is obj2.test("hello")
51 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_log_utils.py:
--------------------------------------------------------------------------------
 1 | from ehrql.utils import log_utils
 2 | 
 3 | 
 4 | def test_kv():
 5 |     assert log_utils.kv({}) == ""
 6 |     assert (
 7 |         log_utils.kv({"foo": "foo", "bar": 1, "baz": [1, 2, 3]})
 8 |         == "foo=foo bar=1 baz=[1, 2, 3]"
 9 |     )
10 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_math_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.utils.math_utils import get_grouping_level_as_int
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "all_groups, group_subset,expected",
 8 |     [
 9 |         ([], [], 0),
10 |         (["a", "b"], ["a"], 1),
11 |         (["a", "b"], ["b"], 2),
12 |         (["a", "b"], ["a", "b"], 0),
13 |         (["d", "e", "f"], ["d"], 3),
14 |         (["d", "e", "f"], ["f", "d"], 2),
15 |         (["d", "e", "f"], ["d", "f"], 2),
16 |         (["a", "b", "c", "d", "e", "f"], ["a"], 31),
17 |         (["a", "b", "c", "d", "e", "f"], ["a", "b", "c", "d", "e", "f"], 0),
18 |     ],
19 | )
20 | def test_get_grouping_level_as_int(all_groups, group_subset, expected):
21 |     assert get_grouping_level_as_int(all_groups, group_subset) == expected
22 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_regex_utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import pytest
 4 | 
 5 | from ehrql.utils import regex_utils
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "re_str,examples",
10 |     [
11 |         # Branches
12 |         (
13 |             "abc(foo|bar)",
14 |             ["abcbar", "abcfoo"],
15 |         ),
16 |         # Ranges
17 |         (
18 |             "[A-Z][0-9]",
19 |             ["D1", "V1", "H0", "L9", "E2"],
20 |         ),
21 |         # Repeats
22 |         (
23 |             "A{2,4}_?B{2}",
24 |             ["AAABB", "AABB", "AA_BB", "AABB", "AAA_BB"],
25 |         ),
26 |         # Unbounded repeats
27 |         (
28 |             "a+b*",
29 |             ["aaaaaaaab", "ab", "aaaaaaaaaa", "aab", "aaaaaabbb"],
30 |         ),
31 |         # All together now ...
32 |         (
33 |             "(none|alpha[A-Z]{3,5}|digit[0-9]{3,5})",
34 |             ["alphaCVD", "alphaALT", "alphaFAH", "none", "digit18445"],
35 |         ),
36 |     ],
37 | )
38 | def test_create_regex_generator(re_str, examples):
39 |     generator = regex_utils.create_regex_generator(re_str)
40 |     rnd = random.Random(1234)
41 |     assert [generator(rnd) for _ in examples] == examples
42 | 
43 | 
44 | def test_validate_regex():
45 |     assert regex_utils.validate_regex("E[A-Z]{3}-(foo|bar)")
46 | 
47 | 
48 | @pytest.mark.parametrize(
49 |     "re_str,error",
50 |     [
51 |         # Parse errors from Python's regex engine are bubbled up
52 |         ("abc(123", r"missing \), unterminated subpattern at position 3"),
53 |         # Valid regexes which use unhandled constructs (e.g. non-greedy matches) should
54 |         # raise an "unsupported" error
55 |         ("t+?test", "unsupported"),
56 |         # Subpattern groups are supported, but attempting to set flags inside the group
57 |         # is not
58 |         ("(?i:TEST)", "unsupported"),
59 |         # And neither is unsetting flags
60 |         ("(?-i:TEST)", "unsupported"),
61 |     ],
62 | )
63 | def test_validate_regex_error(re_str, error):
64 |     with pytest.raises(regex_utils.RegexError, match=error):
65 |         regex_utils.validate_regex(re_str)
66 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_sequence_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.utils.sequence_utils import ordered_set
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "input_list,expected",
 8 |     [
 9 |         ([4, 3, 2, 3, 5, 5, 2, 2, 1, 4], [4, 3, 2, 5, 1]),
10 |         ([4, -1, 3, 3, 2], [4, -1, 3, 2]),
11 |         (["f", "d", "f", "f", "d", "e", "f"], ["f", "d", "e"]),
12 |         ([1, "d", 2, "d", 3, "d"], [1, "d", 2, 3]),
13 |     ],
14 | )
15 | def test_ordered_set(input_list, expected):
16 |     assert ordered_set(input_list) == expected
17 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_string_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.utils.string_utils import strip_indent
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "s,expected",
 8 |     [
 9 |         (
10 |             "Should\nbe\nuntouched",
11 |             "Should\nbe\nuntouched",
12 |         ),
13 |         (
14 |             """
15 |             Leading newline and indent should be stripped:
16 | 
17 |               But nested indent retained
18 | 
19 |             Like this.
20 |             """,
21 |             (
22 |                 "Leading newline and indent should be stripped:\n"
23 |                 "\n"
24 |                 "  But nested indent retained\n"
25 |                 "\n"
26 |                 "Like this."
27 |             ),
28 |         ),
29 |     ],
30 | )
31 | def test_strip_indent(s, expected):
32 |     assert strip_indent(s) == expected
33 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_traceback_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ehrql.utils.traceback_utils import get_trimmed_traceback, walk_traceback
 4 | 
 5 | 
 6 | # NOTE
 7 | #
 8 | # These tests exist purely to exercise some edge cases of the module and keep coverage
 9 | # happy. The actual behaviour of the module is covered in:
10 | # tests/integeration/utils/test_traceback_utils.py
11 | 
12 | 
13 | def test_walk_to_end_of_traceback():
14 |     exc = exception_with_traceback()
15 |     tb_list = list(walk_traceback(exc.__traceback__))
16 |     assert len(tb_list) == 1
17 | 
18 | 
19 | def test_get_trimmed_traceback_with_incorrect_filename():
20 |     exc = exception_with_traceback()
21 |     with pytest.raises(StopIteration):
22 |         get_trimmed_traceback(exc, "no_such_file")
23 | 
24 | 
25 | def exception_with_traceback():
26 |     try:
27 |         raise ValueError()
28 |     except ValueError as exc:
29 |         return exc
30 | 


--------------------------------------------------------------------------------