├── tests ├── unit │ └── nhp │ │ ├── model │ │ ├── __init__.py │ │ ├── test___init__.py │ │ ├── test_helpers.py │ │ ├── data │ │ │ ├── test_data.py │ │ │ ├── test_reference.py │ │ │ └── test_local.py │ │ ├── test_params.py │ │ ├── test__main__.py │ │ ├── test_run.py │ │ ├── test_aae.py │ │ ├── test_inpatient_efficiencies.py │ │ └── test_outpatients.py │ │ └── docker │ │ ├── test_config.py │ │ └── test___main__.py ├── conftest.py └── integration │ └── nhp │ └── model │ ├── test_params_validation.py │ └── test_run_model.py ├── .coveragerc ├── src └── nhp │ ├── docker │ ├── __init__.py │ ├── config.py │ ├── __main__.py │ └── run.py │ └── model │ ├── data │ ├── __init__.py │ ├── reference │ │ ├── variant_lookup.json │ │ ├── __init__.py │ │ └── hsa_split_normal_params.csv │ ├── data.py │ └── local.py │ ├── __init__.py │ ├── helpers.py │ ├── params │ ├── __main__.py │ └── __init__.py │ ├── __main__.py │ ├── run.py │ ├── aae.py │ ├── health_status_adjustment.py │ ├── outpatients.py │ ├── model_iteration.py │ ├── activity_resampling.py │ └── results.py ├── .vscode ├── extensions.json ├── settings.json ├── tasks.json └── launch.json ├── codecov.yml ├── CODEOWNERS ├── .github ├── workflows │ ├── deploy_dev.yaml │ ├── codecov.yaml │ ├── linting.yaml │ ├── deploy_release.yaml │ ├── remove_untagged_container_images.yaml │ ├── removed_closed_prs.yaml │ ├── build_container.yaml │ ├── deploy_docs.yaml │ ├── deploy_pr.yaml │ ├── build_schema.yaml │ └── build_app.yaml ├── dependabot.yml └── copilot-instructions.md ├── docs ├── gen_ref_pages.py └── index.md ├── LICENSE ├── mkdocs.yml ├── Dockerfile ├── pyproject.toml ├── readme.md └── .gitignore /tests/unit/nhp/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | tests/* -------------------------------------------------------------------------------- /src/nhp/docker/__init__.py: -------------------------------------------------------------------------------- 1 | """NHP Demand Model - Docker runtime.""" 2 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test___init__.py: -------------------------------------------------------------------------------- 1 | """Test __init__.py.""" 2 | 3 | import nhp.model as mdl 4 | 5 | # no tests other than ability to import 6 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ryanluker.vscode-coverage-gutters", 4 | "ms-python.python", 5 | "ms-toolsai.jupyter", 6 | "ms-python.pylint" 7 | ] 8 | } -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: 100% 6 | threshold: 0% 7 | patch: 8 | default: 9 | target: 100% 10 | threshold: 0% 11 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser): 5 | parser.addoption("--data-dir", help="Directory containing data", default="data/synth") 6 | 7 | 8 | @pytest.fixture 9 | def data_dir(request): 10 | return request.config.getoption("--data-dir") 11 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo. Unless a later match takes precedence, 3 | # @primary-owner and @secondary-owner will be requested for 4 | # review when someone opens a pull request. 5 | * @tomjemmett @The-Strategy-Unit/nhp_model_devs 6 | -------------------------------------------------------------------------------- /src/nhp/model/data/__init__.py: -------------------------------------------------------------------------------- 1 | """NHP Data Loaders. 2 | 3 | Classes for loading data for the NHP model. Each class supports loading data from different sources, 4 | such as from local storage or directly from DataBricks. 5 | """ 6 | 7 | from nhp.model.data.data import Data 8 | from nhp.model.data.local import Local 9 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "pylint.args": [ 3 | "--max-args", 4 | "6" 5 | ], 6 | "python.testing.pytestArgs": [ 7 | "tests", 8 | "--data-dir=data/synth" 9 | ], 10 | "python.testing.unittestEnabled": false, 11 | "python.testing.pytestEnabled": true, 12 | "[python]": { 13 | "editor.defaultFormatter": "charliermarsh.ruff" 14 | }, 15 | "azurite.location": "../.azurite", 16 | "nhp.data_path": "data/synth" 17 | } -------------------------------------------------------------------------------- /.github/workflows/deploy_dev.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | workflow_dispatch: 6 | 7 | name: Deploy Dev 8 | 9 | jobs: 10 | deploy-ghcr-dev: 11 | uses: ./.github/workflows/build_container.yaml 12 | with: 13 | docker-tag: ghcr.io/the-strategy-unit/nhp_model:dev 14 | app-version: dev 15 | data-version: dev 16 | secrets: inherit 17 | 18 | deploy-dev-schema: 19 | uses: ./.github/workflows/build_schema.yaml 20 | with: 21 | schema-tag: dev 22 | secrets: inherit -------------------------------------------------------------------------------- /tests/integration/nhp/model/test_params_validation.py: -------------------------------------------------------------------------------- 1 | """Test params-sample.""" 2 | 3 | import pytest 4 | 5 | from nhp.model.params import load_sample_params 6 | 7 | 8 | def test_sample_params_are_valid(): 9 | load_sample_params(dataset="dev", scenario="unit-test") 10 | # assert: no exception raised 11 | 12 | 13 | def test_load_sample_params_validation_fails(): 14 | from jsonschema.exceptions import ValidationError 15 | 16 | with pytest.raises(ValidationError): 17 | load_sample_params(demographic_factors="invalid-factor") 18 | -------------------------------------------------------------------------------- /src/nhp/model/__init__.py: -------------------------------------------------------------------------------- 1 | """New Hospitals Programme Model.""" 2 | 3 | # re-export anything useful 4 | from nhp.model.aae import AaEModel 5 | from nhp.model.activity_resampling import ActivityResampling 6 | from nhp.model.health_status_adjustment import HealthStatusAdjustmentInterpolated 7 | from nhp.model.inpatients import InpatientEfficiencies, InpatientsModel 8 | from nhp.model.model import Model 9 | from nhp.model.model_iteration import ModelIteration 10 | from nhp.model.outpatients import OutpatientsModel 11 | from nhp.model.params import load_params, load_sample_params 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "uv" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "shell", 6 | "label": "Run Code Coverage", 7 | "command": "${command:python.interpreterPath} -m pytest --cov=. tests/unit --cov-branch --cov-report xml:coverage.xml --cov-report term", 8 | "problemMatcher": [] 9 | }, 10 | { 11 | "type": "shell", 12 | "label": "Download synth data", 13 | "command": "az storage blob download-batch -d data -s data --pattern 'synth/**.parquet' --account-name nhpsa --auth-mode login --overwrite", 14 | "problemMatcher": [] 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /src/nhp/model/data/reference/variant_lookup.json: -------------------------------------------------------------------------------- 1 | { 2 | "migration_category": "ppp", 3 | "var_proj_5_year_migration": "ppp", 4 | "var_proj_10_year_migration": "ppp", 5 | "var_proj_high_intl_migration": "ppp", 6 | "var_proj_low_intl_migration": "ppp", 7 | "var_proj_zero_net_migration": "ppp", 8 | "high_population": "hle", 9 | "young_age_structure": "lle", 10 | "high_fertility": "ppp", 11 | "old_age_structure": "hle", 12 | "low_population": "lle", 13 | "low_fertility": "ppp", 14 | "high_life_expectancy": "hle", 15 | "low_life_expectancy": "lle", 16 | "no_mortality_improvement": "ppp", 17 | "zero_net_migration": "ppp", 18 | "replacement_fertility": "ppp", 19 | "custom_projection_R0A66": "ppp", 20 | "custom_projection_RD8": "ppp" 21 | } -------------------------------------------------------------------------------- /.github/workflows/codecov.yaml: -------------------------------------------------------------------------------- 1 | name: CodeCov 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | run: 11 | runs-on: ubuntu-latest 12 | 13 | defaults: 14 | run: 15 | shell: bash -l {0} 16 | 17 | steps: 18 | - uses: actions/checkout@v5 19 | 20 | - name: Install the latest version of uv 21 | uses: astral-sh/setup-uv@v6 22 | with: 23 | version: "latest" 24 | activate-environment: true 25 | 26 | - name: Install dependencies 27 | run: uv pip install -e ".[dev]" 28 | 29 | - name: Generate Report 30 | run: uv run pytest --cov=. tests/unit --ignore=tests --cov-branch --cov-report xml:coverage.xml 31 | 32 | - name: Upload Coverage to Codecov 33 | uses: codecov/codecov-action@v5 34 | with: 35 | token: ${{ secrets.CODECOV_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/linting.yaml: -------------------------------------------------------------------------------- 1 | name: Linting and Type checking 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | ruff-check: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v5 10 | 11 | - uses: astral-sh/ruff-action@v3 12 | 13 | ruff-format-check: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v5 17 | 18 | - uses: astral-sh/ruff-action@v3 19 | with: 20 | args: format --check --diff 21 | 22 | ty-check: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v5 26 | 27 | - name: Install the latest version of uv 28 | uses: astral-sh/setup-uv@v6 29 | with: 30 | version: "latest" 31 | activate-environment: true 32 | 33 | - name: Install dependencies 34 | run: uv pip install -e ".[dev,databricks]" 35 | 36 | - name: Generate Report 37 | run: uvx ty check . -------------------------------------------------------------------------------- /tests/unit/nhp/model/test_helpers.py: -------------------------------------------------------------------------------- 1 | """Test helper methods.""" 2 | 3 | from unittest.mock import Mock 4 | 5 | import pytest 6 | 7 | from nhp.model.helpers import inrange, rnorm 8 | 9 | 10 | @pytest.mark.parametrize("value, expected", [(-1.1, 0), (1.1, 1), (0, 0), (1, 1), (0.5, 0.5)]) 11 | def test_inrange(value, expected): 12 | """Test that the inrange function returns expected values.""" 13 | assert inrange(value) == expected 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "value, low, high, expected", [(0, 0.25, 0.75, 0.25), (1, 0.25, 0.75, 0.75)] 18 | ) 19 | def test_inrange_lo_hi(value, low, high, expected): 20 | """Test that the inrange function returns expected values.""" 21 | assert inrange(value, low, high) == expected 22 | 23 | 24 | def test_rnorm(): 25 | """Test that the rnorm function returns random values.""" 26 | rng = Mock() 27 | rng.normal.return_value = 1.5 28 | assert rnorm(rng, 1, 2) == 1.5 29 | rng.normal.assert_called_once_with(1.5, 0.3901520929904105) 30 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Run Model", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "module": "nhp.model", 12 | "args": [ 13 | "${input:params_file}", 14 | "-d=${config:nhp.data_path}", 15 | "--type=${input:type}" 16 | ], 17 | "console": "integratedTerminal" 18 | } 19 | ], 20 | "inputs": [ 21 | { 22 | "id": "params_file", 23 | "type": "promptString", 24 | "description": "Path to parameters file (leave empty to use sample parameters)", 25 | "default": "" 26 | }, 27 | { 28 | "id": "type", 29 | "type": "pickString", 30 | "description": "Model Run Type", 31 | "options": [ 32 | "ip", 33 | "op", 34 | "aae", 35 | "all" 36 | ] 37 | } 38 | ] 39 | } -------------------------------------------------------------------------------- /.github/workflows/deploy_release.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - 'v*.*.*' 5 | 6 | name: Deploy Production 7 | 8 | jobs: 9 | 10 | set-tag: 11 | runs-on: ubuntu-latest 12 | outputs: 13 | tag: ${{ steps.create-tag.outputs.TAG }} 14 | 15 | steps: 16 | - name: Create tag 17 | id: create-tag 18 | run: | 19 | TAG=`echo ${{ github.ref_name }} | awk 'BEGIN { FS="."; } { print ""$1"."$2; }'` 20 | echo "TAG=$TAG" >> $GITHUB_OUTPUT 21 | 22 | deploy-ghcr-production: 23 | needs: [set-tag] 24 | uses: ./.github/workflows/build_container.yaml 25 | with: 26 | docker-tag: ghcr.io/the-strategy-unit/nhp_model:${{ needs.set-tag.outputs.tag }},ghcr.io/the-strategy-unit/nhp_model:latest 27 | app-version: ${{ github.ref_name }} 28 | data-version: ${{vars.data_version}} 29 | secrets: inherit 30 | 31 | deploy-schema: 32 | needs: [set-tag] 33 | 34 | uses: ./.github/workflows/build_schema.yaml 35 | with: 36 | schema-tag: ${{ needs.set-tag.outputs.tag }} 37 | -------------------------------------------------------------------------------- /docs/gen_ref_pages.py: -------------------------------------------------------------------------------- 1 | """Generate the code reference pages and navigation.""" 2 | 3 | from pathlib import Path 4 | 5 | import mkdocs_gen_files 6 | 7 | nav = mkdocs_gen_files.Nav() 8 | 9 | src = Path(__file__).parent.parent / "src" 10 | for path in sorted(src.rglob("*.py")): 11 | module_path = path.relative_to(src).with_suffix("") 12 | doc_path = path.relative_to(src).with_suffix(".md") 13 | full_doc_path = Path("reference", doc_path) 14 | 15 | parts = tuple(module_path.parts) 16 | 17 | if parts[-1] == "__init__": 18 | parts = parts[:-1] 19 | doc_path = doc_path.with_name("index.md") 20 | full_doc_path = full_doc_path.with_name("index.md") 21 | elif parts[-1] == "__main__": 22 | continue 23 | 24 | nav[parts] = doc_path.as_posix() 25 | 26 | with mkdocs_gen_files.open(full_doc_path, "w") as fd: 27 | ident = ".".join(parts) 28 | fd.write(f"::: {ident}") 29 | 30 | mkdocs_gen_files.set_edit_path(full_doc_path, path) 31 | 32 | with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file: 33 | nav_file.writelines(nav.build_literate_nav()) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 NHS England 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/nhp/docker/config.py: -------------------------------------------------------------------------------- 1 | """config values for docker container.""" 2 | 3 | import os 4 | 5 | import dotenv 6 | 7 | 8 | class Config: 9 | """Configuration class for Docker container.""" 10 | 11 | def __init__(self): 12 | """Configuration settings for the Docker container.""" 13 | dotenv.load_dotenv() 14 | 15 | self._app_version = os.environ.get("APP_VERSION", "dev") 16 | self._data_version = os.environ.get("DATA_VERSION", "dev") 17 | self._storage_account = os.environ.get("STORAGE_ACCOUNT") 18 | 19 | @property 20 | def APP_VERSION(self) -> str: 21 | """What is the version of the app?""" 22 | return self._app_version 23 | 24 | @property 25 | def DATA_VERSION(self) -> str: 26 | """What version of the data are we using?""" 27 | return self._data_version 28 | 29 | @property 30 | def STORAGE_ACCOUNT(self) -> str: 31 | """What is the name of the storage account?""" 32 | if self._storage_account is None: 33 | raise ValueError("STORAGE_ACCOUNT environment variable must be set") 34 | return self._storage_account 35 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: NHP Model Documentation 2 | site_description: Documentation for the NHP Model project 3 | site_url: https://connect.strategyunitwm.nhs.uk/nhp/model_documentation/ 4 | 5 | repo_url: https://github.com/the-strategy-unit/nhp_model 6 | repo_name: the-strategy-unit/nhp_model 7 | 8 | theme: 9 | name: material 10 | features: 11 | - navigation.tabs 12 | - navigation.sections 13 | - navigation.expand 14 | - navigation.top 15 | - search.highlight 16 | - content.code.copy 17 | 18 | plugins: 19 | - search 20 | - gen-files: 21 | scripts: 22 | - docs/gen_ref_pages.py 23 | - literate-nav: 24 | nav_file: SUMMARY.md 25 | - section-index 26 | - mkdocstrings: 27 | handlers: 28 | python: 29 | options: 30 | docstring_style: google 31 | show_source: true 32 | show_root_heading: true 33 | show_root_toc_entry: false 34 | merge_init_into_class: true 35 | filters: 36 | - "!_version" 37 | - "!__main__" 38 | 39 | nav: 40 | - Home: index.md 41 | - API Reference: reference/ 42 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # NHP Model Documentation 2 | 3 | Welcome to the NHP Model documentation. This project provides modeling capabilities for healthcare activity prediction. 4 | 5 | ## Features 6 | 7 | - Multiple model types (inpatients, outpatients, A&E) 8 | - Support for loading data from different sources 9 | - Docker containerization 10 | 11 | ## Quick Start 12 | 13 | Download and install [`uv`](https://docs.astral.sh/uv/getting-started/installation/), then run `uv sync`. Download data locally, e.g., download a synthetic dataset to `data/synth`. Then, run the model with: 14 | 15 | ``` bash 16 | uv run python -m nhp.model -d data/synth --type all 17 | ``` 18 | 19 | to run the model with the sample parameters. 20 | 21 | ### Generating Sample Parameters 22 | 23 | you can generate sample parameters using the CLI command: 24 | 25 | ``` bash 26 | uv run python -m nhp.model.params --dataset [dataset] --scenario [scenario] --app-version dev > params.json 27 | ``` 28 | 29 | replacing the values as needed. This will generate a file `params.json` with the sample parameters. 30 | 31 | ## API Reference 32 | 33 | See the [Model Reference](reference/nhp/model/index.md) for detailed documentation of all classes and functions. 34 | -------------------------------------------------------------------------------- /.github/workflows/remove_untagged_container_images.yaml: -------------------------------------------------------------------------------- 1 | name: Clean up untagged container images 2 | 3 | on: 4 | workflow_dispatch: # allows manual triggering via GitHub UI 5 | schedule: 6 | - cron: '0 1 * * *' # runs at 01:00 UTC every day 7 | 8 | jobs: 9 | remove-untagged-images: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | # you must create a classic PAT with `delete:packages` scope and add it as a secret named `PAT_DELETE_PACKAGES` 14 | - name: Authenticate with PAT 15 | run: echo "${{ secrets.PAT_DELETE_PACKAGES }}" | gh auth login --with-token 16 | - name: "Remove untagged images" 17 | run: | 18 | VERSION_IDS=$(gh api /orgs/the-strategy-unit/packages/container/nhp_model/versions \ 19 | -H "Accept: application/vnd.github+json" \ 20 | --paginate | \ 21 | jq -r '.[] | select(.metadata.container.tags | length == 0) | .id') 22 | 23 | for VERSION_ID in $VERSION_IDS; do 24 | echo "Deleting version ID: $VERSION_ID" 25 | gh api "/orgs/the-strategy-unit/packages/container/nhp_model/versions/${VERSION_ID}" \ 26 | -X DELETE \ 27 | -H "Accept: application/vnd.github+json" 28 | done -------------------------------------------------------------------------------- /src/nhp/model/helpers.py: -------------------------------------------------------------------------------- 1 | """Helper methods for the model package.""" 2 | 3 | import json 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | def inrange(value: float, low: float = 0, high: float = 1) -> float: 10 | """Force a value to be in the interval [low, high]. 11 | 12 | Args: 13 | value: The value we want to constrain to the interval. 14 | low: The minimum that `value` can be. Defaults to 0. 15 | high: The maximum that `value` can be. Defaults to 1. 16 | 17 | Returns: 18 | `value` constrained to the interval. 19 | """ 20 | return max(low, min(high, value)) 21 | 22 | 23 | def rnorm(rng: np.random.Generator, low: float, high: float) -> float: 24 | """Create a single random normal value from a 80% confidence interval. 25 | 26 | Args: 27 | rng: A random number generator. 28 | low: The low estimate of our 80% confidence interval. 29 | high: The high estimate of our 80% confidence interval. 30 | 31 | Returns: 32 | A random normal value. 33 | """ 34 | q = 2.563103 # generated by: 2 * norm.ppf(1 - (1 - 0.8) / 2) 35 | mean = (high + low) / 2 36 | stdev = (high - low) / q 37 | return rng.normal(mean, stdev) 38 | -------------------------------------------------------------------------------- /src/nhp/model/data/reference/__init__.py: -------------------------------------------------------------------------------- 1 | """Reference Data. 2 | 3 | Any reference data needed for the model should be stored in this folder. 4 | 5 | Helper methods for loading the reference data should be created here. 6 | """ 7 | 8 | import json 9 | import pathlib 10 | 11 | import pandas as pd 12 | 13 | 14 | def _ref_path(filename): 15 | path = pathlib.Path(__file__).parent.resolve() 16 | return path.joinpath(filename) 17 | 18 | 19 | def variant_lookup() -> dict: 20 | """Variant Lookup (Health Status Adjustment). 21 | 22 | Returns: 23 | A dictionary of the variant lookups. 24 | """ 25 | with _ref_path("variant_lookup.json").open("r", encoding="UTF-8") as vlup_file: 26 | return json.load(vlup_file) 27 | 28 | 29 | def life_expectancy() -> pd.DataFrame: 30 | """Life Expectancy (Health Status Adjustment). 31 | 32 | Returns: 33 | A pandas DataFrame containing life expectancy data. 34 | """ 35 | return pd.read_csv(_ref_path("life_expectancy.csv")) 36 | 37 | 38 | def split_normal_params() -> pd.DataFrame: 39 | """Split Normal Parameters (Health Status Adjustment). 40 | 41 | Returns: 42 | A pandas DataFrame containing split normal parameters. 43 | """ 44 | return pd.read_csv(_ref_path("hsa_split_normal_params.csv")) 45 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/astral-sh/uv:python3.13-alpine 2 | 3 | # Create user 4 | RUN addgroup -g 1000 nhp && adduser -u 1000 -G nhp -s /bin/sh -h /app -D nhp 5 | WORKDIR /app 6 | USER nhp 7 | 8 | # Create directories with proper permissions (as root) 9 | RUN for DIR in data queue results; do mkdir -p $DIR; done 10 | 11 | # Copy dependency files first (optimal caching) 12 | COPY --chown=nhp:nhp pyproject.toml uv.lock ./ 13 | 14 | # Install dependencies only (skip local package) 15 | RUN uv sync --frozen --no-dev --no-install-project 16 | 17 | # Ensure Python can find installed packages and local model 18 | ENV PATH="/app/.venv/bin:$PATH" 19 | 20 | # Copy application code (changes most frequently) 21 | COPY --chown=nhp:nhp src/nhp/ /app/src/nhp/ 22 | RUN uv pip install . 23 | 24 | # define build arguments, these will set the environment variables in the container 25 | ARG app_version 26 | ARG data_version 27 | ARG storage_account 28 | 29 | ENV APP_VERSION=$app_version 30 | ENV DATA_VERSION=$data_version 31 | ENV STORAGE_ACCOUNT=$storage_account 32 | 33 | # Define static environment variables 34 | ENV BATCH_SIZE=16 35 | 36 | # temporary patch until we update the api 37 | USER root 38 | RUN printf '#!/bin/sh\n/app/.venv/bin/python -m nhp.docker "$@"\n' > /opt/docker_run.py && \ 39 | chmod +x /opt/docker_run.py 40 | USER nhp 41 | 42 | ENTRYPOINT ["python", "-m", "nhp.docker"] 43 | -------------------------------------------------------------------------------- /tests/unit/nhp/docker/test_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from nhp.docker.config import Config 7 | 8 | 9 | def test_config_sets_values_from_envvars(mocker): 10 | # arrange 11 | mocker.patch("dotenv.load_dotenv") 12 | 13 | # act 14 | with patch.dict( 15 | os.environ, 16 | { 17 | "APP_VERSION": "app version", 18 | "DATA_VERSION": "data version", 19 | "STORAGE_ACCOUNT": "storage account", 20 | }, 21 | ): 22 | config = Config() 23 | 24 | # assert 25 | assert config.APP_VERSION == "app version" 26 | assert config.DATA_VERSION == "data version" 27 | assert config.STORAGE_ACCOUNT == "storage account" 28 | 29 | 30 | def test_config_uses_default_values(mocker): 31 | # arrange 32 | mocker.patch("dotenv.load_dotenv") 33 | 34 | # act 35 | config = Config() 36 | 37 | # assert 38 | assert config.APP_VERSION == "dev" 39 | assert config.DATA_VERSION == "dev" 40 | 41 | with pytest.raises(ValueError, match="STORAGE_ACCOUNT environment variable must be set"): 42 | config.STORAGE_ACCOUNT 43 | 44 | 45 | def test_config_calls_dotenv_load(mocker): 46 | # arrange 47 | m = mocker.patch("dotenv.load_dotenv") 48 | 49 | # act 50 | config = Config() 51 | 52 | # assert 53 | m.assert_called_once() 54 | -------------------------------------------------------------------------------- /.github/workflows/removed_closed_prs.yaml: -------------------------------------------------------------------------------- 1 | name: Clean up closed pull requests 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - closed 7 | jobs: 8 | remove-pr-image: 9 | runs-on: ubuntu-latest 10 | steps: 11 | # you must create a classic PAT with `delete:packages` scope and add it as a secret named `PAT_DELETE_PACKAGES` 12 | - name: Authenticate with PAT 13 | run: echo "${{ secrets.PAT_DELETE_PACKAGES }}" | gh auth login --with-token 14 | - name: "Remove PR image" 15 | env: 16 | TAG_TO_DELETE: "pr-${{ github.event.pull_request.number }}" 17 | run: | 18 | ALL_VERSIONS=$(gh api /orgs/the-strategy-unit/packages/container/nhp_model/versions \ 19 | -H "Accept: application/vnd.github+json" \ 20 | --paginate) 21 | 22 | VERSION_ID=$(jq -r --arg tag $TAG_TO_DELETE \ 23 | '.[] | select(.metadata.container.tags[] == $tag) | .id' \ 24 | <<< "$ALL_VERSIONS") 25 | 26 | if [ -n "$VERSION_ID" ]; then 27 | echo "Deleting version ID: $VERSION_ID" 28 | gh api \ 29 | -X DELETE \ 30 | /orgs/the-strategy-unit/packages/container/nhp_model/versions/${VERSION_ID} \ 31 | -H "Accept: application/vnd.github+json" 32 | else 33 | echo "Tag '$TAG_TO_DELETE' not found — skipping delete" 34 | fi -------------------------------------------------------------------------------- /tests/unit/nhp/model/data/test_data.py: -------------------------------------------------------------------------------- 1 | """test nhp data (local).""" 2 | 3 | import pytest 4 | 5 | from nhp.model.data import Data 6 | 7 | 8 | def test_get_ip(): 9 | d = Data() 10 | with pytest.raises(NotImplementedError): 11 | d.get_ip() 12 | 13 | 14 | def test_get_ip_strategies(): 15 | d = Data() 16 | with pytest.raises(NotImplementedError): 17 | d.get_ip_strategies() 18 | 19 | 20 | def test_get_op(): 21 | d = Data() 22 | with pytest.raises(NotImplementedError): 23 | d.get_op() 24 | 25 | 26 | def test_get_aae(): 27 | d = Data() 28 | with pytest.raises(NotImplementedError): 29 | d.get_aae() 30 | 31 | 32 | def test_get_birth_factors(): 33 | d = Data() 34 | with pytest.raises(NotImplementedError): 35 | d.get_birth_factors() 36 | 37 | 38 | def test_get_demographic_factors(): 39 | d = Data() 40 | with pytest.raises(NotImplementedError): 41 | d.get_demographic_factors() 42 | 43 | 44 | def test_get_hsa_activity_table(): 45 | d = Data() 46 | with pytest.raises(NotImplementedError): 47 | d.get_hsa_activity_table() 48 | 49 | 50 | def test_get_hsa_gams(): 51 | d = Data() 52 | with pytest.raises(NotImplementedError): 53 | d.get_hsa_gams() 54 | 55 | 56 | def test_get_inequalities(): 57 | d = Data() 58 | with pytest.raises(NotImplementedError): 59 | d.get_inequalities() 60 | -------------------------------------------------------------------------------- /.github/workflows/build_container.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy schema.json to GitHub Pages 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | docker-tag: 7 | required: true 8 | default: dev 9 | type: string 10 | app-version: 11 | required: true 12 | default: dev 13 | type: string 14 | data-version: 15 | required: true 16 | default: dev 17 | type: string 18 | 19 | jobs: 20 | 21 | build-container: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: 'Checkout GitHub Action' 25 | uses: actions/checkout@v5 26 | 27 | - name: 'Login to GitHub Container Registry' 28 | uses: docker/login-action@v3 29 | with: 30 | registry: ghcr.io 31 | username: ${{github.actor}} 32 | password: ${{secrets.GITHUB_TOKEN}} 33 | - name: Set up Docker Buildx 34 | uses: docker/setup-buildx-action@v3 35 | 36 | - name: "Build image" 37 | uses: docker/build-push-action@v6 38 | with: 39 | context: . 40 | tags: ${{ inputs.docker-tag }} 41 | push: true 42 | cache-from: type=gha 43 | cache-to: type=gha,mode=max 44 | platforms: linux/amd64 45 | provenance: false 46 | sbom: false 47 | build-args: | 48 | app_version=${{ inputs.app-version }} 49 | data_version=${{ inputs.data-version }} 50 | -------------------------------------------------------------------------------- /.github/workflows/deploy_docs.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | permissions: 8 | contents: read 9 | pages: write 10 | id-token: write 11 | 12 | concurrency: 13 | group: "pages" 14 | cancel-in-progress: false 15 | 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: Install uv 23 | uses: astral-sh/setup-uv@v6 24 | 25 | - name: Set up Python 26 | run: uv python install 27 | 28 | - name: Install dependencies 29 | run: uv sync --extra docs 30 | 31 | - name: Build documentation 32 | run: uv run mkdocs build --clean 33 | 34 | - name: Upload artifact 35 | uses: actions/upload-artifact@v4 36 | with: 37 | name: site 38 | path: ./site 39 | 40 | deploy: 41 | runs-on: ubuntu-latest 42 | needs: build 43 | steps: 44 | - name: Download artifact 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: site 48 | path: ./site 49 | 50 | - name: Install uv (for rsconnect) 51 | uses: astral-sh/setup-uv@v6 52 | 53 | - name: Configure Connect 54 | run: uvx rsconnect add -s ${{ secrets.RSCONNECT_URL }} -n connect -k ${{ secrets.RSCONNECT_API_KEY }} 55 | 56 | - name: Deploy to Connect 57 | run: uvx rsconnect deploy html site -a ${{ vars.CONNECT_DOCS_APP_ID }} 58 | -------------------------------------------------------------------------------- /.github/workflows/deploy_pr.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | types: [opened, synchronize, reopened] 4 | 5 | name: Deploy PR 6 | 7 | jobs: 8 | 9 | deploy-ghcr-pr: 10 | uses: ./.github/workflows/build_container.yaml 11 | with: 12 | docker-tag: ghcr.io/the-strategy-unit/nhp_model:pr-${{ github.event.number }} 13 | app-version: dev 14 | data-version: dev 15 | secrets: inherit 16 | 17 | add-comment-to-pr: 18 | runs-on: ubuntu-latest 19 | needs: ["deploy-ghcr-pr"] 20 | steps: 21 | - name: Find Comment 22 | uses: peter-evans/find-comment@v3 23 | id: fc 24 | with: 25 | issue-number: ${{ github.event.pull_request.number }} 26 | comment-author: 'github-actions[bot]' 27 | body-includes: "## ✅ A new build is available" 28 | 29 | - name: Comment with container image link 30 | if: github.event_name == 'pull_request' 31 | uses: peter-evans/create-or-update-comment@v4 32 | with: 33 | token: ${{ secrets.GITHUB_TOKEN }} 34 | comment-id: ${{ steps.fc.outputs.comment-id }} 35 | issue-number: ${{ github.event.pull_request.number }} 36 | body: | 37 | ## ✅ A new build is available. 38 | 39 | You can use the following to use pull the image into your local environment: 40 | 41 | ``` bash 42 | docker pull ghcr.io/the-strategy-unit/nhp_model:pr-${{ github.event.number }} 43 | ``` 44 | edit-mode: replace 45 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test_params.py: -------------------------------------------------------------------------------- 1 | """Test params-sample.""" 2 | 3 | from unittest.mock import mock_open, patch 4 | 5 | from nhp.model.params import load_params, load_sample_params, validate_params 6 | 7 | 8 | def test_validate_params(mocker): 9 | # arrange 10 | m_validate = mocker.patch("jsonschema.validate") 11 | m_json_load = mocker.patch("json.load", return_value="schema") 12 | 13 | # act 14 | validate_params("params") # ty: ignore[invalid-argument-type] 15 | 16 | # assert 17 | m_validate.assert_called_once_with(instance="params", schema="schema") 18 | assert m_json_load.call_args[0][0].name.endswith("params-schema.json") 19 | 20 | 21 | def test_load_params(mocker): 22 | """Test that load_params opens the params file.""" 23 | # arrange 24 | m_vp = mocker.patch("nhp.model.params.validate_params") 25 | 26 | # act 27 | with patch("builtins.open", mock_open(read_data='{"params": 0}')) as mock_file: 28 | assert load_params("filename.json") == {"params": 0} 29 | 30 | # assert 31 | mock_file.assert_called_with("filename.json", "r", encoding="UTF-8") 32 | m_vp.assert_called_once_with({"params": 0}) 33 | 34 | 35 | def test_load_sample_params(mocker): 36 | # arrange 37 | m_validate = mocker.patch("nhp.model.params.validate_params") 38 | 39 | # act 40 | actual = load_sample_params(dataset="dev", scenario="unit-test") 41 | 42 | # assert 43 | assert actual["dataset"] == "dev" 44 | assert actual["scenario"] == "unit-test" 45 | m_validate.assert_called_once_with(actual) 46 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/data/test_reference.py: -------------------------------------------------------------------------------- 1 | """test nhp data (reference).""" 2 | 3 | from nhp.model.data import reference 4 | 5 | expected_hsa_variants = {"lle", "hle", "ppp"} 6 | 7 | 8 | def test_variants(): 9 | # arrange 10 | 11 | # act 12 | vl = reference.variant_lookup() 13 | 14 | # assert 15 | assert len(vl) == 19 16 | assert set(vl.values()) == expected_hsa_variants 17 | 18 | 19 | def test_life_expectancy(): 20 | # arrange 21 | 22 | # act 23 | le = reference.life_expectancy() 24 | 25 | # assert 26 | assert len(le) == 276 27 | assert list(le.columns) == ["var", "sex", "age"] + [str(i) for i in range(2018, 2044)] 28 | assert set(le["var"]) == expected_hsa_variants 29 | assert set(le["sex"]) == {1, 2} 30 | assert list(le["age"]) == list(range(55, 101)) * 6 31 | assert le[[str(i) for i in range(2018, 2043)]].sum().sum() == 89323.6 32 | 33 | 34 | def test_split_normal_params(): 35 | # arrange 36 | 37 | # act 38 | snp = reference.split_normal_params() 39 | 40 | # assert 41 | assert len(snp) == 144 42 | assert list(snp.columns) == [ 43 | "var", 44 | "sex", 45 | "year", 46 | "mode", 47 | "sd1", 48 | "sd2", 49 | ] 50 | assert set(snp["var"]) == expected_hsa_variants 51 | assert set(snp["sex"]) == {"f", "m"} 52 | assert snp["year"].to_list() == list(range(2020, 2044)) * 6 53 | assert snp[["mode", "sd1", "sd2"]].sum().to_list() == [ 54 | 12.159496878354162, 55 | 55.57842646603717, 56 | 140.31508181965998, 57 | ] 58 | -------------------------------------------------------------------------------- /src/nhp/model/params/__main__.py: -------------------------------------------------------------------------------- 1 | """Generate sample parameters.""" 2 | 3 | import argparse 4 | import json 5 | import random 6 | from datetime import datetime 7 | 8 | from . import load_sample_params 9 | 10 | 11 | def _parse_args(): 12 | parser = argparse.ArgumentParser(description="CLI for loading sample parameters.") 13 | parser.add_argument("--dataset", required=True, help="Dataset name") 14 | parser.add_argument("--scenario", required=True, help="Scenario name") 15 | parser.add_argument("--app-version", default="dev", help="App version (default: dev)") 16 | parser.add_argument("--model-runs", type=int, default=256, help="Model Runs (default: 256)") 17 | parser.add_argument("--start-year", type=int, default=2023, help="Start year (default: 2023)") 18 | parser.add_argument("--end-year", type=int, default=2041, help="End year (default: 2041)") 19 | parser.add_argument( 20 | "--seed", 21 | type=int, 22 | default=None, 23 | help="Random seed (default: a random integer between 0 and 10000)", 24 | ) 25 | 26 | return parser.parse_args() 27 | 28 | 29 | def main(): 30 | """Generate sample parameters and print them to the console.""" 31 | args = _parse_args() 32 | 33 | if args.seed is None: 34 | args.seed = random.randint(0, 10000) 35 | 36 | params = load_sample_params( 37 | dataset=args.dataset, 38 | scenario=args.scenario, 39 | app_version=args.app_version, 40 | start_year=args.start_year, 41 | end_year=args.end_year, 42 | seed=args.seed, 43 | ) 44 | 45 | params["create_datetime"] = datetime.now().strftime("%Y%m%d_%H%M%S") 46 | 47 | print(json.dumps(params, indent=2)) 48 | 49 | 50 | def _init(): 51 | main() 52 | 53 | 54 | if __name__ == "__main__": 55 | _init() 56 | -------------------------------------------------------------------------------- /.github/workflows/build_schema.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy schema.json to GitHub Pages 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | schema-tag: 7 | required: true 8 | default: dev 9 | type: string 10 | 11 | permissions: 12 | pages: write 13 | id-token: write 14 | contents: write 15 | 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout repository 21 | uses: actions/checkout@v5 22 | 23 | - name: Clone existing schemas branch content 24 | run: | 25 | git fetch --depth=1 origin schemas 26 | git worktree add schemas schemas 27 | 28 | - name: Copy schema to app version path 29 | run: | 30 | mkdir -p schemas/${{ inputs.schema-tag }} 31 | sed '/$id/ s/dev/${{ inputs.schema-tag }}/' src/nhp/model/params/params-schema.json > schemas/${{ inputs.schema-tag }}/params-schema.json 32 | 33 | - name: Commit the schema 34 | run: | 35 | git config user.name "github-actions[bot]" 36 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" 37 | pushd schemas 38 | git add ${{ inputs.schema-tag }}/params-schema.json 39 | git commit -m "adding schema for ${{ inputs.schema-tag }}" || echo "No changes to commit" 40 | git push origin schemas 41 | popd 42 | 43 | - name: Upload to GitHub Pages 44 | uses: actions/upload-pages-artifact@v4 45 | with: 46 | path: schemas 47 | 48 | deploy: 49 | needs: build 50 | runs-on: ubuntu-latest 51 | environment: 52 | name: github-pages 53 | url: ${{ steps.deployment.outputs.page_url }} 54 | steps: 55 | - name: Deploy to GitHub Pages 56 | id: deployment 57 | uses: actions/deploy-pages@v4 58 | -------------------------------------------------------------------------------- /src/nhp/model/params/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for working with model parameter files.""" 2 | 3 | import json 4 | 5 | from importlib_resources import files 6 | 7 | from nhp.model import params as params_module 8 | 9 | 10 | def validate_params(params: dict) -> None: 11 | """Validate model parameters. 12 | 13 | Args: 14 | params: The model parameters to validate. 15 | 16 | Raises: 17 | jsonschema.ValidationError: If the parameters are not valid. 18 | """ 19 | # lazy load for test collection performance 20 | import jsonschema # noqa: PLC0415 21 | 22 | with ( 23 | files(params_module) 24 | .joinpath("params-schema.json") 25 | .open("r", encoding="UTF-8") as schema_file 26 | ): 27 | schema = json.load(schema_file) 28 | 29 | jsonschema.validate(instance=params, schema=schema) 30 | 31 | 32 | def load_params(filename: str) -> dict: 33 | """Load a params file. 34 | 35 | Args: 36 | filename: The full name of the file that we wish to load. 37 | 38 | Raises: 39 | jsonschema.ValidationError: If the parameters are not valid. 40 | 41 | Returns: 42 | The model parameters. 43 | """ 44 | with open(filename, "r", encoding="UTF-8") as prf: 45 | params = json.load(prf) 46 | 47 | validate_params(params) 48 | 49 | return params 50 | 51 | 52 | def load_sample_params(**kwargs) -> dict: 53 | """Load a sample params file. 54 | 55 | Args: 56 | **kwargs: Any parameters to override in the sample params. 57 | 58 | Raises: 59 | jsonschema.ValidationError: If the parameters are not valid. 60 | 61 | Returns: 62 | The model parameters. 63 | """ 64 | with files(params_module).joinpath("params-sample.json").open("r", encoding="UTF-8") as prf: 65 | params = json.load(prf) 66 | 67 | params.update(kwargs) 68 | 69 | validate_params(params) 70 | 71 | return params 72 | -------------------------------------------------------------------------------- /.github/workflows/build_app.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | 10 | build-app: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: "Checkout GitHub Action" 14 | uses: actions/checkout@v5 15 | 16 | - name: Install the latest version of uv 17 | uses: astral-sh/setup-uv@v6 18 | with: 19 | version: "latest" 20 | enable-cache: true 21 | cache-dependency-glob: "uv.lock" 22 | 23 | - name: Build (release) 24 | if: github.ref != 'refs/heads/main' 25 | run: uv build 26 | 27 | - name: Build (dev) 28 | if: github.ref == 'refs/heads/main' 29 | env: 30 | SETUPTOOLS_SCM_PRETEND_VERSION: 0.dev0 31 | run: uv build 32 | 33 | - name: Generate artifact 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: dist-whl 37 | path: dist/*.whl 38 | 39 | upload-build-to-storage-account: 40 | runs-on: ubuntu-latest 41 | needs: ["build-app"] 42 | 43 | steps: 44 | - name: Download build artifact 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: dist-whl 48 | path: . 49 | 50 | - name: Install Azure CLI 51 | uses: Azure/setup-azd@v2 52 | 53 | - name: Upload to blob storage 54 | run: | 55 | az storage blob upload \ 56 | --account-name ${{ secrets.NHP_STORAGE_ACCOUNT }} \ 57 | --container-name app \ 58 | --file $(ls *.whl) \ 59 | --sas-token "${{ secrets.APP_CONTAINER_SAS }}" \ 60 | --overwrite 61 | 62 | add-build-to-release: 63 | runs-on: ubuntu-latest 64 | needs: ["build-app"] 65 | permissions: 66 | contents: write 67 | 68 | steps: 69 | - name: Download build artifact 70 | uses: actions/download-artifact@v4 71 | with: 72 | name: dist-whl 73 | path: . 74 | - name: Upload artifact to the GitHub Release 75 | uses: softprops/action-gh-release@v2 76 | if: github.ref_type == 'tag' 77 | with: 78 | files: "*.whl" 79 | env: 80 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /src/nhp/model/data/data.py: -------------------------------------------------------------------------------- 1 | """NHP Data Loaders. 2 | 3 | Classes for loading data for the NHP model. Each class supports loading data from different sources, 4 | such as from local storage or directly from DataBricks. 5 | """ 6 | 7 | from typing import Any 8 | 9 | import pandas as pd 10 | 11 | 12 | class Data: 13 | """Load NHP data. 14 | 15 | Interface for loading data for the NHP model. This interface should have no concrete 16 | implementations, instead other classes should derive from this interface. 17 | """ 18 | 19 | def __init__(self): 20 | """Initialise Data data loader class.""" 21 | pass 22 | 23 | def get_ip(self) -> pd.DataFrame: 24 | """Get the inpatients dataframe. 25 | 26 | Returns: 27 | The inpatients dataframe. 28 | """ 29 | raise NotImplementedError() 30 | 31 | def get_ip_strategies(self) -> dict[str, pd.DataFrame]: 32 | """Get the inpatients strategies dataframe. 33 | 34 | Returns: 35 | The inpatients strategies dataframe. 36 | """ 37 | raise NotImplementedError() 38 | 39 | def get_op(self) -> pd.DataFrame: 40 | """Get the outpatients dataframe. 41 | 42 | Returns: 43 | The outpatients dataframe. 44 | """ 45 | raise NotImplementedError() 46 | 47 | def get_aae(self) -> pd.DataFrame: 48 | """Get the A&E dataframe. 49 | 50 | Returns: 51 | The A&E dataframe. 52 | """ 53 | raise NotImplementedError() 54 | 55 | def get_birth_factors(self) -> pd.DataFrame: 56 | """Get the birth factors dataframe. 57 | 58 | Returns: 59 | The birth factors dataframe. 60 | """ 61 | raise NotImplementedError() 62 | 63 | def get_demographic_factors(self) -> pd.DataFrame: 64 | """Get the demographic factors dataframe. 65 | 66 | Returns: 67 | The demographic factors dataframe. 68 | """ 69 | raise NotImplementedError() 70 | 71 | def get_hsa_activity_table(self) -> pd.DataFrame: 72 | """Get the demographic factors dataframe. 73 | 74 | Returns: 75 | The demographic factors dataframe. 76 | """ 77 | raise NotImplementedError() 78 | 79 | def get_hsa_gams(self) -> Any: 80 | """Get the health status adjustment gams. 81 | 82 | Returns: 83 | The health status adjustment gams. 84 | """ 85 | raise NotImplementedError() 86 | 87 | def get_inequalities(self) -> pd.DataFrame: 88 | """Get the inequalities dataframe. 89 | 90 | Returns: 91 | The inequalities dataframe. 92 | """ 93 | raise NotImplementedError() 94 | -------------------------------------------------------------------------------- /tests/integration/nhp/model/test_run_model.py: -------------------------------------------------------------------------------- 1 | """Test single model runs for the NHP model.""" 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from nhp.model import ( 7 | AaEModel, 8 | InpatientsModel, 9 | ModelIteration, 10 | OutpatientsModel, 11 | load_sample_params, 12 | ) 13 | from nhp.model.data import Local 14 | from nhp.model.run import run_all 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "model_class, expected_aggregations", 19 | [ 20 | ( 21 | InpatientsModel, 22 | {"sex+tretspef_grouped", "tretspef", "tretspef+los_group", "delivery_episode_in_spell"}, 23 | ), 24 | ( 25 | OutpatientsModel, 26 | { 27 | "sex+tretspef_grouped", 28 | "tretspef", 29 | }, 30 | ), 31 | ( 32 | AaEModel, 33 | { 34 | "acuity", 35 | "attendance_category", 36 | }, 37 | ), 38 | ], 39 | ) 40 | def test_single_model_run(model_class, expected_aggregations, data_dir): 41 | # arrange 42 | params = load_sample_params() 43 | data = Local.create(data_dir) 44 | model = model_class(params, data) 45 | expected_aggregations |= { 46 | "default", 47 | "sex+age_group", 48 | "age", 49 | "avoided_activity", 50 | } 51 | 52 | # act 53 | # rather than using the run_single_model_run function, we directly instantiate ModelIteration 54 | # this is so we can work with the results. run_single_model_run is used to print some output to 55 | # the console. 56 | m_run = ModelIteration(model, 1) 57 | model_results, step_counts = m_run.get_aggregate_results() 58 | 59 | # assert 60 | assert {isinstance(v, pd.Series) for v in model_results.values()} == {True} 61 | assert set(model_results.keys()) == expected_aggregations 62 | assert isinstance(step_counts, pd.Series) 63 | 64 | 65 | def test_all_model_runs(data_dir): 66 | # arrange 67 | params = load_sample_params(model_runs=4) 68 | nhp_data = Local.create(data_dir) 69 | res_path = "results/synthetic/test/20220101_000000" 70 | 71 | # act 72 | actual = run_all(params, nhp_data) 73 | 74 | # assert 75 | assert actual == ( 76 | [ 77 | f"{res_path}/{i}.parquet" 78 | for i in [ 79 | "acuity", 80 | "age", 81 | "attendance_category", 82 | "avoided_activity", 83 | "default", 84 | "delivery_episode_in_spell", 85 | "sex+age_group", 86 | "sex+tretspef_grouped", 87 | "tretspef", 88 | "tretspef+los_group", 89 | "step_counts", 90 | ] 91 | ] 92 | + [ 93 | f"{res_path}/params.json", 94 | ], 95 | "synthetic/test-20220101_000000", 96 | ) 97 | -------------------------------------------------------------------------------- /src/nhp/docker/__main__.py: -------------------------------------------------------------------------------- 1 | """Methods for running the NHP model in a Docker container.""" 2 | 3 | import argparse 4 | import logging 5 | from datetime import datetime 6 | 7 | from nhp.docker.config import Config 8 | from nhp.docker.run import RunWithAzureStorage, RunWithLocalStorage 9 | from nhp.model.data import Local 10 | from nhp.model.run import run_all 11 | 12 | 13 | def parse_args(): 14 | """Parse command line arguments.""" 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument( 17 | "params_file", 18 | help="Name of the parameters file stored in Azure", 19 | ) 20 | 21 | parser.add_argument( 22 | "--local-storage", 23 | "-l", 24 | action="store_true", 25 | help="Use local storage (instead of Azure)", 26 | ) 27 | 28 | parser.add_argument("--save-full-model-results", action="store_true") 29 | 30 | return parser.parse_args() 31 | 32 | 33 | def main(config: Config = Config()): 34 | """The main method.""" 35 | args = parse_args() 36 | 37 | logging.basicConfig( 38 | format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s", 39 | level=logging.INFO, 40 | datefmt="%Y-%m-%d %H:%M:%S", 41 | ) 42 | 43 | if args.local_storage: 44 | runner = RunWithLocalStorage(args.params_file) 45 | else: 46 | runner = RunWithAzureStorage(args.params_file, config) 47 | 48 | logging.info("running model for: %s", args.params_file) 49 | logging.info("submitted by: %s", runner.params.get("user")) 50 | logging.info("model_runs: %s", runner.params["model_runs"]) 51 | logging.info("start_year: %s", runner.params["start_year"]) 52 | logging.info("end_year: %s", runner.params["end_year"]) 53 | logging.info("app_version: %s", runner.params["app_version"]) 54 | 55 | start_time = datetime.now() 56 | 57 | saved_files, results_file = run_all( 58 | runner.params, 59 | Local.create("data"), 60 | runner.progress_callback(), 61 | args.save_full_model_results, 62 | ) 63 | 64 | end_time = datetime.now() 65 | elapsed_time = end_time - start_time 66 | 67 | additional_metadata = { 68 | "model_run_start_time": start_time.isoformat(), 69 | "model_run_end_time": end_time.isoformat(), 70 | "model_run_elapsed_time_seconds": elapsed_time.total_seconds(), 71 | } 72 | 73 | runner.finish(results_file, saved_files, args.save_full_model_results, additional_metadata) 74 | 75 | logging.info("complete") 76 | 77 | 78 | def init(): 79 | """Method for calling main.""" 80 | if __name__ == "__main__": 81 | # run the model in a try catch block - ensures any exceptions that occur in the 82 | # multiprocessing pool are handled and logged correctly. 83 | # this prevents the docker container from hanging indefinitely. 84 | try: 85 | config = Config() 86 | main(config) 87 | except Exception as e: 88 | logging.error("An error occurred: %s", str(e)) 89 | raise e 90 | 91 | 92 | init() 93 | -------------------------------------------------------------------------------- /src/nhp/model/__main__.py: -------------------------------------------------------------------------------- 1 | """Functions to run the model. 2 | 3 | This module allows you to run the various models. It allows you to run a single model run of one of 4 | the different types of models for debugging purposes, or it allows you to run all of the models in 5 | parallel saving the results to disk. 6 | 7 | There are existing launch profiles for vscode that use this file, or you can use it directly in the 8 | console, e.g. 9 | 10 | python -m nhp.model -d data --model-run 1 -t ip 11 | 12 | will run a single run of the inpatients model, returning the results to display. 13 | """ 14 | 15 | import argparse 16 | import logging 17 | 18 | from nhp.model.aae import AaEModel 19 | from nhp.model.data import Local 20 | from nhp.model.inpatients import InpatientsModel 21 | from nhp.model.outpatients import OutpatientsModel 22 | from nhp.model.params import load_params, load_sample_params 23 | from nhp.model.run import run_all, run_single_model_run 24 | 25 | 26 | def _parse_args() -> argparse.Namespace: # pragma: no cover 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument( 29 | "params_file", 30 | nargs="?", 31 | default="", 32 | help="Path to the params.json file (leave empty to use sample parameters).", 33 | ) 34 | parser.add_argument("-d", "--data-path", help="Path to the data", default="data") 35 | parser.add_argument( 36 | "-r", "--model-run", help="Which model iteration to run", default=1, type=int 37 | ) 38 | parser.add_argument( 39 | "-t", 40 | "--type", 41 | default="all", 42 | choices=["all", "aae", "ip", "op"], 43 | help="Model type, either: all, ip, op, aae", 44 | type=str, 45 | ) 46 | parser.add_argument("--save-full-model-results", action="store_true") 47 | return parser.parse_args() 48 | 49 | 50 | def main() -> None: 51 | """Main method. 52 | 53 | Runs when __name__ == "__main__" 54 | """ 55 | # Grab the Arguments 56 | args = _parse_args() 57 | if args.params_file == "": 58 | params = load_sample_params() 59 | else: 60 | params = load_params(args.params_file) 61 | # define the model to run 62 | match args.type: 63 | case "all": 64 | logging.basicConfig( 65 | format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s", 66 | level=logging.INFO, 67 | datefmt="%Y-%m-%d %H:%M:%S", 68 | ) 69 | 70 | run_all( 71 | params, 72 | Local.create(args.data_path), 73 | lambda _: lambda _: None, 74 | args.save_full_model_results, 75 | ) 76 | return 77 | case "aae": 78 | model_type = AaEModel 79 | case "ip": 80 | model_type = InpatientsModel 81 | case "op": 82 | model_type = OutpatientsModel 83 | case _: 84 | raise ValueError(f"Unknown model type: {args.type}") 85 | 86 | run_single_model_run(params, args.data_path, model_type, args.model_run) 87 | 88 | 89 | def init(): 90 | """Method for calling main.""" 91 | if __name__ == "__main__": 92 | main() 93 | 94 | 95 | init() 96 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "nhp-model" 3 | dynamic = ["version"] 4 | description = "New Hospital Programme demand model" 5 | 6 | requires-python = ">=3.11,<3.14" 7 | 8 | dependencies = [ 9 | "azure-identity>=1.12", 10 | "azure-storage-blob>=12.15", 11 | "azure-storage-file-datalake>=12.10", 12 | "importlib-resources>=6.5.2", 13 | "jsonschema>=4.23.0", 14 | "numpy>=1.23", 15 | "pandas>=1.5", 16 | "pandas-flavor<0.8", 17 | "pandas-stubs>=2.3.2.250926", 18 | "pyarrow>=20.0", 19 | "pyjanitor>=0.23", 20 | "python-dotenv>=1.0", 21 | "scipy>=1.10", 22 | "tqdm>=4.65", 23 | ] 24 | 25 | 26 | # Explicitly specify which packages to include 27 | [tool.setuptools] 28 | package-dir = { "" = "src" } 29 | 30 | [tool.setuptools.packages.find] 31 | where = ["src"] 32 | 33 | [tool.setuptools.package-data] 34 | "nhp.model.data.reference" = ["*.csv", "*.json"] 35 | "nhp.model.params" = ["*.json"] 36 | 37 | [project.optional-dependencies] 38 | dev = [ 39 | "coverage>=7.2", 40 | "ipykernel>=6.21", 41 | "ipython>=8.11", 42 | "ipywidgets>=8.0", 43 | "jupyter_client>=8.0", 44 | "jupyter_core>=5.2", 45 | "jupyterlab_pygments>=0.2", 46 | "jupyterlab_widgets>=3.0", 47 | "matplotlib>=3.7", 48 | "nbconvert>=7.2", 49 | "nbformat>=5.7", 50 | "notebook>=6.5", 51 | "pygam>=0.8", 52 | "pytest>=7.2", 53 | "pytest-cov>=4.0", 54 | "pytest-mock>=3.10", 55 | "ruff>=0.11.10", 56 | "setuptools-scm>=8.3.1", 57 | "snakeviz>=2.1", 58 | "widgetsnbextension>=4.0", 59 | ] 60 | databricks = [ 61 | "pyspark", 62 | "databricks-connect" 63 | ] 64 | docs = [ 65 | "mkdocs", 66 | "mkdocs-material", 67 | "mkdocstrings[python]", 68 | "mkdocs-gen-files", 69 | "mkdocs-literate-nav", 70 | "mkdocs-section-index" 71 | ] 72 | 73 | [build-system] 74 | requires = ["setuptools>=80", "setuptools-scm>=8", "wheel"] 75 | build-backend = "setuptools.build_meta" 76 | 77 | # Ruff configuration 78 | [tool.ruff] 79 | line-length = 100 80 | target-version = "py311" 81 | indent-width = 4 82 | exclude = ["docs"] 83 | 84 | [tool.ruff.lint.per-file-ignores] 85 | "tests/**.py" = [ 86 | "D", # pydocstyle 87 | "PLC0415", # `import` should be at the top-level of a file 88 | "PLR2004", # Magic value used in comparison 89 | "PD901", # Avoid using the generic variable name `df` 90 | "RUF005", # list concatenation 91 | ] 92 | 93 | [tool.ruff.lint] 94 | # Simple rules: pylint + isort 95 | select = [ 96 | "D", # pydocstyle 97 | "E", # pycodestyle errors 98 | "W", # pycodestyle warnings 99 | "I", # isort (import sorting) 100 | "PL", # pylint rules, 101 | "PD", # pandas-vet 102 | "NPY", # NumPy-specific rules 103 | "RUF", # Ruff-specific rules 104 | ] 105 | 106 | [tool.ruff.lint.isort] 107 | # isort configuration 108 | force-single-line = false 109 | combine-as-imports = true 110 | 111 | [tool.ruff.lint.pydocstyle] 112 | convention = "google" 113 | 114 | [tool.ruff.lint.pylint] 115 | max-args = 7 116 | 117 | [tool.ruff.format] 118 | quote-style = "double" 119 | indent-style = "space" 120 | skip-magic-trailing-comma = false 121 | line-ending = "auto" 122 | 123 | [tool.setuptools_scm] 124 | write_to = "src/nhp/model/_version.py" 125 | fallback_version = "0.0.0" 126 | 127 | [tool.ty.src] 128 | exclude = ["docs"] 129 | 130 | [tool.pytest.ini_options] 131 | testpaths=["tests/unit", "tests/integration"] 132 | python_files=["test_*.py"] 133 | norecursedirs=["docs", "*.egg-info", ".git", "appdir", ".tox", "__pycache__"] 134 | -------------------------------------------------------------------------------- /src/nhp/model/data/local.py: -------------------------------------------------------------------------------- 1 | """NHP Data Loaders. 2 | 3 | Classes for loading data for the NHP model. Each class supports loading data from different sources, 4 | such as from local storage or directly from DataBricks. 5 | """ 6 | 7 | import pickle 8 | from typing import Any, Callable 9 | 10 | import pandas as pd 11 | 12 | from nhp.model.data import Data 13 | 14 | 15 | class Local(Data): 16 | """Load NHP data from local storage.""" 17 | 18 | def __init__(self, data_path: str, year: int, dataset: str): 19 | """Initialise Local data loader class.""" 20 | self._data_path = data_path 21 | self._year = str(year) 22 | self._dataset = dataset 23 | 24 | def _file_path(self, file): 25 | return "/".join([self._data_path, file, f"fyear={self._year}", f"dataset={self._dataset}"]) 26 | 27 | @staticmethod 28 | def create(data_path: str) -> Callable[[int, str], Any]: 29 | """Create Local Data object. 30 | 31 | Args: 32 | data_path: The path to where the data is stored locally. 33 | 34 | Returns: 35 | A function to initialise the object. 36 | """ 37 | return lambda year, dataset: Local(data_path, year, dataset) 38 | 39 | def get_ip(self) -> pd.DataFrame: 40 | """Get the inpatients dataframe. 41 | 42 | Returns: 43 | The inpatients dataframe. 44 | """ 45 | return self._get_parquet("ip") 46 | 47 | def get_ip_strategies(self) -> dict[str, pd.DataFrame]: 48 | """Get the inpatients strategies dataframe. 49 | 50 | Returns: 51 | The inpatients strategies dataframes. 52 | """ 53 | return { 54 | i: self._get_parquet(f"ip_{i}_strategies") 55 | for i in ["activity_avoidance", "efficiencies"] 56 | } 57 | 58 | def get_op(self) -> pd.DataFrame: 59 | """Get the outpatients dataframe. 60 | 61 | Returns: 62 | The outpatients dataframe. 63 | """ 64 | return self._get_parquet("op").rename(columns={"index": "rn"}) 65 | 66 | def get_aae(self) -> pd.DataFrame: 67 | """Get the A&E dataframe. 68 | 69 | Returns: 70 | The A&E dataframe. 71 | """ 72 | return self._get_parquet("aae").rename(columns={"index": "rn"}) 73 | 74 | def get_birth_factors(self) -> pd.DataFrame: 75 | """Get the birth factors dataframe. 76 | 77 | Returns: 78 | The birth factors dataframe. 79 | """ 80 | return self._get_parquet("birth_factors") 81 | 82 | def get_demographic_factors(self) -> pd.DataFrame: 83 | """Get the demographic factors dataframe. 84 | 85 | Returns: 86 | The demographic factors dataframe. 87 | """ 88 | return self._get_parquet("demographic_factors") 89 | 90 | def get_hsa_activity_table(self) -> pd.DataFrame: 91 | """Get the demographic factors dataframe. 92 | 93 | Returns: 94 | The demographic factors dataframe. 95 | """ 96 | return self._get_parquet("hsa_activity_tables") 97 | 98 | def get_hsa_gams(self) -> Any: 99 | """Get the health status adjustment gams. 100 | 101 | Returns: 102 | The health status adjustment gams. 103 | """ 104 | with open(f"{self._data_path}/hsa_gams.pkl", "rb") as hsa_pkl: 105 | return pickle.load(hsa_pkl) 106 | 107 | def get_inequalities(self) -> pd.DataFrame: 108 | """Get the inequalities dataframe. 109 | 110 | Returns: 111 | The inequalities dataframe. 112 | """ 113 | return self._get_parquet("inequalities") 114 | 115 | def _get_parquet(self, file: str) -> pd.DataFrame: 116 | """Load specific parquet file using Pandas. 117 | 118 | Args: 119 | file: Specific parquet filename to open. 120 | 121 | Returns: 122 | DataFrame containing the data. 123 | """ 124 | inequalities_df = pd.read_parquet(self._file_path(file)) 125 | return inequalities_df 126 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test__main__.py: -------------------------------------------------------------------------------- 1 | """Test run_model.py.""" 2 | 3 | from unittest.mock import Mock, patch 4 | 5 | import pytest 6 | 7 | from nhp.model.__main__ import main 8 | from nhp.model.aae import AaEModel 9 | from nhp.model.inpatients import InpatientsModel 10 | from nhp.model.outpatients import OutpatientsModel 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "activity_type, model_class", 15 | [("aae", AaEModel), ("ip", InpatientsModel), ("op", OutpatientsModel)], 16 | ) 17 | def test_main_debug_runs_model(mocker, activity_type, model_class): 18 | # arrange 19 | args = Mock() 20 | args.type = activity_type 21 | args.data_path = "data" 22 | args.model_run = 0 23 | args.params_file = "params.json" 24 | mocker.patch("nhp.model.__main__._parse_args", return_value=args) 25 | ldp_mock = mocker.patch("nhp.model.__main__.load_params", return_value="params") 26 | 27 | run_all_mock = mocker.patch("nhp.model.__main__.run_all") 28 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run") 29 | 30 | # act 31 | main() 32 | 33 | # assert 34 | run_all_mock.assert_not_called() 35 | run_single_mock.assert_called_once_with("params", "data", model_class, 0) 36 | ldp_mock.assert_called_once_with("params.json") 37 | 38 | 39 | def test_main_can_use_sample_params(mocker): 40 | # arrange 41 | args = Mock() 42 | args.type = "ip" 43 | args.data_path = "data" 44 | args.model_run = 0 45 | args.params_file = "" 46 | mocker.patch("nhp.model.__main__._parse_args", return_value=args) 47 | ldp_mock = mocker.patch("nhp.model.__main__.load_params", return_value="params") 48 | ldsp_mock = mocker.patch("nhp.model.__main__.load_sample_params", return_value="params") 49 | 50 | run_all_mock = mocker.patch("nhp.model.__main__.run_all") 51 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run") 52 | 53 | # act 54 | main() 55 | 56 | # assert 57 | run_all_mock.assert_not_called() 58 | run_single_mock.assert_called_once_with("params", "data", InpatientsModel, 0) 59 | ldp_mock.assert_not_called() 60 | ldsp_mock.assert_called_once() 61 | 62 | 63 | def test_main_debug_runs_model_invalid_type(mocker): 64 | # arrange 65 | args = Mock() 66 | args.type = "invalid" 67 | args.data_path = "data" 68 | args.model_run = 0 69 | args.params_file = "queue/params.json" 70 | mocker.patch("nhp.model.__main__._parse_args", return_value=args) 71 | mocker.patch("nhp.model.__main__.load_params", return_value="params") 72 | 73 | run_all_mock = mocker.patch("nhp.model.__main__.run_all") 74 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run") 75 | 76 | # act 77 | with pytest.raises(ValueError): 78 | main() 79 | 80 | # assert 81 | run_all_mock.assert_not_called() 82 | run_single_mock.assert_not_called() 83 | 84 | 85 | def test_main_all_runs(mocker): 86 | # arrange 87 | args = Mock() 88 | args.type = "all" 89 | args.data_path = "data" 90 | args.params_file = "queue/params.json" 91 | args.save_full_model_results = False 92 | mocker.patch("nhp.model.__main__._parse_args", return_value=args) 93 | ldp_mock = mocker.patch("nhp.model.__main__.load_params", return_value="params") 94 | local_data_mock = mocker.patch("nhp.model.__main__.Local") 95 | local_data_mock.create.return_value = "data" 96 | 97 | run_all_mock = mocker.patch("nhp.model.__main__.run_all") 98 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run") 99 | 100 | # act 101 | main() 102 | 103 | # assert 104 | run_all_mock.assert_called_once() 105 | assert run_all_mock.call_args[0][0] == "params" 106 | assert run_all_mock.call_args[0][1] == "data" 107 | assert run_all_mock.call_args[0][2]("a")(0) is None 108 | 109 | run_single_mock.assert_not_called() 110 | ldp_mock.assert_called_once_with("queue/params.json") 111 | local_data_mock.create.assert_called_once_with("data") 112 | 113 | 114 | def test_init(mocker): 115 | """It should run the main method if __name__ is __main__.""" 116 | import nhp.model.__main__ as r 117 | 118 | main_mock = mocker.patch("nhp.model.__main__.main") 119 | 120 | r.init() # should't call main 121 | main_mock.assert_not_called() 122 | 123 | with patch.object(r, "__name__", "__main__"): 124 | r.init() # should call main 125 | main_mock.assert_called_once() 126 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # The New Hospital Programme Demand Model 2 | 3 | 4 | 5 | [![Project Status: Active – The project has reached a stable, usable 6 | state and is being actively 7 | developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![codecov](https://codecov.io/gh/The-Strategy-Unit/nhp_model/branch/main/graph/badge.svg?token=uGmRhc1n9C)](https://codecov.io/gh/The-Strategy-Unit/nhp_model) 8 | 9 | 10 | 11 | ## Welcome 12 | 13 | Welcome to the [New Hospital Programme demand and capacity modelling tool](https://www.strategyunitwm.nhs.uk/new-hospital-programme-demand-model). 14 | 15 | Smarter Hospital Planning 16 | 17 | This repository contains the model code but there are several other repositories which contain useful tools to [explore the data underpinning and set the parameters for the model](https://github.com/The-Strategy-Unit/nhp_inputs), as well as to [explore model outputs](https://github.com/The-Strategy-Unit/nhp_outputs). [An overview of how the different tools interact with each other is available](https://connect.strategyunitwm.nhs.uk/nhp/project_information/project_plan_and_summary/components-overview.html). 18 | 19 | The methodology underpinning this model is outlined in this [simple one page explainer](https://connect.strategyunitwm.nhs.uk/nhp_model_explainer/). We have a more technical [project information site](https://connect.strategyunitwm.nhs.uk/nhp/project_information/) which includes further details about the model and the data that the model was built on. 20 | 21 | ## Running the model 22 | 23 | ### For external users 24 | 25 | Although all the code is available openly, it is challenging to run the model if you do not have access to the data and infrastructure at the Strategy Unit. 26 | 27 | We use national [Hospital Episode Statistics](https://digital.nhs.uk/services/data-access-request-service-dars/dars-products-and-services/data-set-catalogue/hospital-episode-statistics) data which goes through extensive processing, as detailed in the [nhp_data repository](https://github.com/The-Strategy-Unit/nhp_data). 28 | Some of the types of potentially mitigable activity rely on having access to the full national dataset, not just a local dataset. 29 | Without this data and infrastructure, your data will not be correctly formatted to run in the model. 30 | 31 | [We are working on providing synthetic data](https://github.com/The-Strategy-Unit/nhp_model/issues/347) so that interested parties can run the model locally to see how it works. 32 | 33 | Prospective users of the model should [contact the Strategy Unit](mailto:strategy.unit@nhs.net) to enquire about using the model on our existing infrastructure. 34 | 35 | Please note that it is important that the parameters of the model are set with great care and with proper support. It is important also that healthcare system partners are appropriately involved in parameter setting. For a description of the full process and support provision that is necessary to ensure the model functions well please see the [NHS Futures workspace](https://future.nhs.uk/NewHospitalProgrammeDigital/browseFolder?fid=53572528&done=OBJChangesSaved) 36 | 37 | ### For internal users with full access to correctly formatted data 38 | 39 | Assuming you have your data in the correct format, store it in the `data` folder. [Further details on the correct formatting for the data to follow](https://github.com/The-Strategy-Unit/nhp_model/issues/419). 40 | 41 | The model runs using parameters that are set in a [JSON file](#json-schema). 42 | 43 | ### Running the model using `uv` 44 | 45 | This package is built using [`uv`](https://docs.astral.sh/uv/). If you have `uv` installed, run the model using: `uv run -m nhp.model path/to/params.json -d path/to/data` 46 | 47 | ### Running the model without `uv` 48 | 49 | 1. Install the `nhp_model` package using `pip install .` 50 | 1. Run the model using: `python -m nhp.model path/to/params.json -d path/to/data` 51 | 52 | ## Deployment 53 | 54 | The model is deployed to Azure Container Registry and GitHub Container Registry on pull requests, tagging the container as `nhp_model:dev`, and on releases its deployed to `nhp_model:v*.*.*` and `nhp_model:latest`. 55 | 56 | ## JSON Schema 57 | 58 | Parameters for the model are set in JSON format; an example can be seen in `src/nhp/model/params/params-sample.json`. As the model develops, requirements for this JSON file change over time. We use [JSON schema](https://json-schema.org/understanding-json-schema/about) to manage changes to the parameters file. From model v3.5 onwards, these are deployed to GitHub pages, following this pattern: 59 | 60 | - on merge to `main`, the schema is deployed to `https://the-strategy-unit.github.io/nhp_model/dev/params-schema.json` 61 | - on release of new model version vX.X, the schema is deployed to `https://the-strategy-unit.github.io/nhp_model/vX.X/params-schema.json` 62 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/data/test_local.py: -------------------------------------------------------------------------------- 1 | """test nhp data (local).""" 2 | 3 | from unittest.mock import call, mock_open, patch 4 | 5 | import pandas as pd 6 | 7 | from nhp.model.data import Local 8 | 9 | 10 | def test_init_sets_values(): 11 | # arrange 12 | 13 | # act 14 | d = Local("data", 2019, "synthetic") 15 | 16 | # assert 17 | assert d._data_path == "data" 18 | 19 | 20 | def test_file_path(): 21 | # arrange 22 | 23 | # act 24 | d = Local("data", 2019, "synthetic") 25 | 26 | # assert 27 | assert d._file_path("ip") == "data/ip/fyear=2019/dataset=synthetic" 28 | 29 | 30 | def test_create_returns_lambda(): 31 | # arrange 32 | 33 | # act 34 | d = Local.create("data")(2019, "synthetic") 35 | 36 | # assert 37 | assert d._data_path == "data" 38 | 39 | 40 | def test_get_ip(mocker): 41 | # arrange 42 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data") 43 | d = Local("data", 2019, "synthetic") 44 | 45 | # act 46 | actual = d.get_ip() 47 | 48 | # assert 49 | assert actual == "data" 50 | m.assert_called_once_with("ip") 51 | 52 | 53 | def test_get_ip_strategies(mocker): 54 | # arrange 55 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data") 56 | d = Local("data", 2019, "synthetic") 57 | 58 | # act 59 | actual = d.get_ip_strategies() 60 | 61 | # assert 62 | assert actual == {"activity_avoidance": "data", "efficiencies": "data"} 63 | assert m.call_count == 2 64 | assert list(m.call_args_list) == [ 65 | call("ip_activity_avoidance_strategies"), 66 | call("ip_efficiencies_strategies"), 67 | ] 68 | 69 | 70 | def test_get_op(mocker): 71 | # arrange 72 | op_data = pd.DataFrame({"col_1": [1, 2], "col_2": [3, 4], "index": [5, 6]}, index=[2, 1]) 73 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value=op_data) 74 | d = Local("data", 2019, "synthetic") 75 | 76 | # act 77 | actual = d.get_op() 78 | 79 | # assert 80 | assert actual.col_1.to_list() == [1, 2] 81 | assert actual.col_2.to_list() == [3, 4] 82 | assert actual.rn.to_list() == [5, 6] 83 | m.assert_called_once_with("op") 84 | 85 | 86 | def test_get_aae(mocker): 87 | # arrange 88 | ae_data = pd.DataFrame({"col_1": [1, 2], "col_2": [3, 4], "index": [5, 6]}, index=[2, 1]) 89 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value=ae_data) 90 | d = Local("data", 2019, "synthetic") 91 | 92 | # act 93 | actual = d.get_aae() 94 | 95 | # assert 96 | assert actual.col_1.to_list() == [1, 2] 97 | assert actual.col_2.to_list() == [3, 4] 98 | assert actual.rn.to_list() == [5, 6] 99 | m.assert_called_once_with("aae") 100 | 101 | 102 | def test_get_birth_factors(mocker): 103 | # arrange 104 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data") 105 | d = Local("data", 2019, "synthetic") 106 | 107 | # act 108 | actual = d.get_birth_factors() 109 | 110 | # assert 111 | assert actual == "data" 112 | m.assert_called_once_with("birth_factors") 113 | 114 | 115 | def test_get_demographic_factors(mocker): 116 | # arrange 117 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data") 118 | d = Local("data", 2019, "synthetic") 119 | 120 | # act 121 | actual = d.get_demographic_factors() 122 | 123 | # assert 124 | assert actual == "data" 125 | m.assert_called_once_with("demographic_factors") 126 | 127 | 128 | def test_get_hsa_activity_table(mocker): 129 | # arrange 130 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data") 131 | d = Local("data", 2019, "synthetic") 132 | 133 | # act 134 | actual = d.get_hsa_activity_table() 135 | 136 | # assert 137 | assert actual == "data" 138 | m.assert_called_once_with("hsa_activity_tables") 139 | 140 | 141 | def test_get_hsa_gams(mocker): 142 | # arrange 143 | m = mocker.patch("pickle.load", return_value="data") 144 | d = Local("data", 2019, "synthetic") 145 | 146 | # act 147 | with patch("builtins.open", mock_open(read_data="hsa_gams")) as mock_file: 148 | actual = d.get_hsa_gams() 149 | 150 | # assert 151 | assert actual == "data" 152 | mock_file.assert_called_with("data/hsa_gams.pkl", "rb") 153 | m.assert_called_once_with(mock_file()) 154 | 155 | 156 | def test_get_inequalities(mocker): 157 | # arrange 158 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data") 159 | d = Local("data", 2019, "synthetic") 160 | 161 | # act 162 | actual = d.get_inequalities() 163 | 164 | # assert 165 | assert actual == "data" 166 | m.assert_called_once_with("inequalities") 167 | 168 | 169 | def test_get_parquet(mocker): 170 | # arrange 171 | fp = mocker.patch("nhp.model.data.Local._file_path", return_value="file_path") 172 | m = mocker.patch("pandas.read_parquet", return_value="data") 173 | d = Local("data", 2019, "synthetic") 174 | 175 | # act 176 | actual = d._get_parquet("file") 177 | 178 | # assert 179 | assert actual == "data" 180 | fp.assert_called_once_with("file") 181 | m.assert_called_once_with("file_path") 182 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | .RDataTmp 8 | 9 | # User-specific files 10 | .Ruserdata 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | 15 | # Output files from R CMD build 16 | /*.tar.gz 17 | 18 | # Output files from R CMD check 19 | /*.Rcheck/ 20 | 21 | # RStudio files 22 | .Rproj.user/ 23 | 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | 28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 29 | .httr-oauth 30 | 31 | # knitr and R markdown default cache directories 32 | *_cache/ 33 | /cache/ 34 | 35 | # Temporary files created by R markdown 36 | *.utf8.md 37 | *.knit.md 38 | 39 | # Environment Variables 40 | .Renviron 41 | .env 42 | 43 | # translation temp files 44 | po/*~ 45 | 46 | # RStudio Connect folder 47 | rsconnect/ 48 | .Rproj.user 49 | 50 | # ignore test/data/results folders 51 | test/ 52 | data/ 53 | results/ 54 | run_results/ 55 | 56 | # ignore any potential data/artifacts 57 | *.zip 58 | *.parquet 59 | 60 | # ignore shiny cache directory 61 | outputs/.cache 62 | 63 | # pyinstaller folders 64 | build/ 65 | dist/ 66 | 67 | # covergage db 68 | .coverage 69 | coverage.xml 70 | 71 | docs/_build 72 | 73 | # ignore targets meta data 74 | _targets*/ 75 | 76 | # override the data rule for model/data 77 | !src/nhp/model/data 78 | !tests/unit/nhp/model/data 79 | 80 | # ignore schemas/ this is a worktree 81 | schemas/ 82 | 83 | ## Python 84 | .git 85 | 86 | # Byte-compiled / optimized / DLL files 87 | __pycache__/ 88 | __PYCACHE__ 89 | *.py[cod] 90 | *$py.class 91 | 92 | # C extensions 93 | *.so 94 | 95 | # Distribution / packaging 96 | .Python 97 | build/ 98 | develop-eggs/ 99 | dist/ 100 | downloads/ 101 | eggs/ 102 | .eggs/ 103 | lib/ 104 | lib64/ 105 | parts/ 106 | sdist/ 107 | var/ 108 | wheels/ 109 | share/python-wheels/ 110 | *.egg-info/ 111 | .installed.cfg 112 | *.egg 113 | MANIFEST 114 | 115 | # PyInstaller 116 | # Usually these files are written by a python script from a template 117 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 118 | *.manifest 119 | *.spec 120 | 121 | # Installer logs 122 | pip-log.txt 123 | pip-delete-this-directory.txt 124 | 125 | # Unit test / coverage reports 126 | htmlcov/ 127 | .tox/ 128 | .nox/ 129 | .coverage 130 | .coverage.* 131 | .cache 132 | nosetests.xml 133 | coverage.xml 134 | *.cover 135 | *.py,cover 136 | .hypothesis/ 137 | .pytest_cache/ 138 | cover/ 139 | 140 | # Translations 141 | *.mo 142 | *.pot 143 | 144 | # Django stuff: 145 | *.log 146 | local_settings.py 147 | db.sqlite3 148 | db.sqlite3-journal 149 | 150 | # Flask stuff: 151 | instance/ 152 | .webassets-cache 153 | 154 | # Scrapy stuff: 155 | .scrapy 156 | 157 | # Sphinx documentation 158 | docs/_build/ 159 | 160 | # PyBuilder 161 | .pybuilder/ 162 | target/ 163 | 164 | # Jupyter Notebook 165 | .ipynb_checkpoints 166 | 167 | # IPython 168 | profile_default/ 169 | ipython_config.py 170 | 171 | # pyenv 172 | # For a library or package, you might want to ignore these files since the code is 173 | # intended to run in multiple environments; otherwise, check them in: 174 | # .python-version 175 | 176 | # pipenv 177 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 178 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 179 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 180 | # install all needed dependencies. 181 | #Pipfile.lock 182 | 183 | # UV 184 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 185 | # This is especially recommended for binary packages to ensure reproducibility, and is more 186 | # commonly ignored for libraries. 187 | #uv.lock 188 | 189 | # poetry 190 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 191 | # This is especially recommended for binary packages to ensure reproducibility, and is more 192 | # commonly ignored for libraries. 193 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 194 | #poetry.lock 195 | 196 | # pdm 197 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 198 | #pdm.lock 199 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 200 | # in version control. 201 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 202 | .pdm.toml 203 | .pdm-python 204 | .pdm-build/ 205 | 206 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 207 | __pypackages__/ 208 | 209 | # Celery stuff 210 | celerybeat-schedule 211 | celerybeat.pid 212 | 213 | # SageMath parsed files 214 | *.sage.py 215 | 216 | # Environments 217 | .env 218 | .venv 219 | env/ 220 | venv/ 221 | ENV/ 222 | env.bak/ 223 | venv.bak/ 224 | 225 | # Spyder project settings 226 | .spyderproject 227 | .spyproject 228 | 229 | # Rope project settings 230 | .ropeproject 231 | 232 | # mkdocs documentation 233 | /site 234 | 235 | # mypy 236 | .mypy_cache/ 237 | .dmypy.json 238 | dmypy.json 239 | 240 | # Pyre type checker 241 | .pyre/ 242 | 243 | # pytype static type analyzer 244 | .pytype/ 245 | 246 | # Cython debug symbols 247 | cython_debug/ 248 | 249 | # PyCharm 250 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 251 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 252 | # and can be added to the global gitignore or merged into this file. For a more nuclear 253 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 254 | #.idea/ 255 | 256 | # PyPI configuration file 257 | .pypirc 258 | src/nhp/model/_version.py 259 | -------------------------------------------------------------------------------- /tests/unit/nhp/docker/test___main__.py: -------------------------------------------------------------------------------- 1 | """test docker run.""" 2 | 3 | from datetime import datetime 4 | from unittest.mock import Mock, patch 5 | 6 | import pytest 7 | 8 | from nhp.docker.__main__ import main, parse_args 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "args, expected_file, expected_local_storage, expected_save_full_model_results", 13 | [ 14 | (["test.json"], "test.json", False, False), 15 | (["test.json", "-l"], "test.json", True, False), 16 | (["test.json"], "test.json", False, False), 17 | (["test.json", "-l"], "test.json", True, False), 18 | (["test.json", "--save-full-model-results"], "test.json", False, True), 19 | ], 20 | ) 21 | def test_parse_args( 22 | mocker, 23 | args, 24 | expected_file, 25 | expected_local_storage, 26 | expected_save_full_model_results, 27 | ): 28 | # arrange 29 | mocker.patch("sys.argv", ["nhp.docker.run.py"] + args) 30 | 31 | # act 32 | actual = parse_args() 33 | 34 | # assert 35 | assert actual.params_file == expected_file 36 | assert actual.local_storage == expected_local_storage 37 | assert actual.save_full_model_results == expected_save_full_model_results 38 | 39 | 40 | def test_main_local(mocker): 41 | # arrange 42 | m = mocker.patch("nhp.docker.__main__.parse_args") 43 | m().params_file = "params.json" 44 | m().local_storage = True 45 | m().save_full_model_results = False 46 | 47 | m_start_time = datetime(2025, 1, 1, 12, 0, 0) 48 | m_end_time = datetime(2025, 1, 1, 12, 0, 2) 49 | m_datetime = mocker.patch("nhp.docker.__main__.datetime") 50 | m_datetime.now.side_effect = [m_start_time, m_end_time] 51 | 52 | rwls = mocker.patch("nhp.docker.__main__.RunWithLocalStorage") 53 | rwas = mocker.patch("nhp.docker.__main__.RunWithAzureStorage") 54 | 55 | local_data_mock = mocker.patch("nhp.docker.__main__.Local") 56 | local_data_mock.create.return_value = "data" 57 | 58 | params = { 59 | "model_runs": 256, 60 | "start_year": 2019, 61 | "end_year": 2035, 62 | "app_version": "dev", 63 | } 64 | 65 | rwls().params = params 66 | rwls.reset_mock() 67 | 68 | ru_m = mocker.patch( 69 | "nhp.docker.__main__.run_all", return_value=("list_of_results", "results.json") 70 | ) 71 | 72 | expected_additional_metadata = { 73 | "model_run_start_time": m_start_time.isoformat(), 74 | "model_run_end_time": m_end_time.isoformat(), 75 | "model_run_elapsed_time_seconds": 2.0, 76 | } 77 | 78 | # act 79 | main() 80 | 81 | # assert 82 | rwls.assert_called_once_with("params.json") 83 | rwas.assert_not_called() 84 | 85 | s = rwls() 86 | ru_m.assert_called_once_with(params, "data", s.progress_callback(), False) 87 | s.finish.assert_called_once_with( 88 | "results.json", "list_of_results", False, expected_additional_metadata 89 | ) 90 | 91 | local_data_mock.create.assert_called_once_with("data") 92 | 93 | 94 | def test_main_azure(mocker): 95 | # arrange 96 | m = mocker.patch("nhp.docker.__main__.parse_args") 97 | m().params_file = "params.json" 98 | m().local_storage = False 99 | m().save_full_model_results = False 100 | 101 | m_start_time = datetime(2025, 1, 1, 12, 0, 0) 102 | m_end_time = datetime(2025, 1, 1, 12, 0, 2) 103 | m_datetime = mocker.patch("nhp.docker.__main__.datetime") 104 | m_datetime.now.side_effect = [m_start_time, m_end_time] 105 | 106 | rwls = mocker.patch("nhp.docker.__main__.RunWithLocalStorage") 107 | rwas = mocker.patch("nhp.docker.__main__.RunWithAzureStorage") 108 | 109 | local_data_mock = mocker.patch("nhp.docker.__main__.Local") 110 | local_data_mock.create.return_value = "data" 111 | 112 | config = Mock() 113 | config.APP_VERSION = "dev" 114 | config.DATA_VERSION = "dev" 115 | config.STORAGE_ACCOUNT = "sa" 116 | 117 | params = { 118 | "model_runs": 256, 119 | "start_year": 2019, 120 | "end_year": 2035, 121 | "app_version": "dev", 122 | } 123 | 124 | rwas().params = params 125 | rwas.reset_mock() 126 | 127 | ru_m = mocker.patch( 128 | "nhp.docker.__main__.run_all", return_value=("list_of_results", "results.json") 129 | ) 130 | 131 | expected_additional_metadata = { 132 | "model_run_start_time": m_start_time.isoformat(), 133 | "model_run_end_time": m_end_time.isoformat(), 134 | "model_run_elapsed_time_seconds": 2.0, 135 | } 136 | 137 | # act 138 | main(config) 139 | 140 | # assert 141 | rwls.assert_not_called() 142 | rwas.assert_called_once_with("params.json", config) 143 | 144 | s = rwas() 145 | ru_m.assert_called_once_with(params, "data", s.progress_callback(), False) 146 | s.finish.assert_called_once_with( 147 | "results.json", "list_of_results", False, expected_additional_metadata 148 | ) 149 | 150 | local_data_mock.create.assert_called_once_with("data") 151 | 152 | 153 | def test_init(mocker): 154 | """It should run the main method if __name__ is __main__.""" 155 | config = mocker.patch("nhp.docker.__main__.Config") 156 | 157 | import nhp.docker.__main__ as r 158 | 159 | main_mock = mocker.patch("nhp.docker.__main__.main") 160 | 161 | r.init() # should't call main 162 | main_mock.assert_not_called() 163 | 164 | with patch.object(r, "__name__", "__main__"): 165 | r.init() # should call main 166 | main_mock.assert_called_once_with(config()) 167 | 168 | 169 | def test_init_catches_exception(mocker): 170 | # arrange 171 | mocker.patch("nhp.docker.__main__.main", side_effect=Exception("Test error")) 172 | import nhp.docker.__main__ as r 173 | 174 | m = mocker.patch("logging.error") 175 | 176 | # act 177 | with patch.object(r, "__name__", "__main__"): 178 | with pytest.raises(Exception, match="Test error"): 179 | r.init() 180 | 181 | # assert 182 | m.assert_called_once_with("An error occurred: %s", "Test error") 183 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test_run.py: -------------------------------------------------------------------------------- 1 | """Test run_model.py.""" 2 | 3 | from unittest.mock import Mock, call 4 | 5 | import pandas as pd 6 | 7 | from nhp.model.aae import AaEModel 8 | from nhp.model.inpatients import InpatientsModel 9 | from nhp.model.outpatients import OutpatientsModel 10 | from nhp.model.run import ( 11 | _run_model, 12 | noop_progress_callback, 13 | run_all, 14 | run_single_model_run, 15 | timeit, 16 | tqdm, 17 | ) 18 | 19 | 20 | def test_tqdm(): 21 | tqdm.progress_callback = Mock() # type: ignore 22 | t = tqdm() 23 | t.update(5) 24 | tqdm.progress_callback.assert_called_once_with(5) # type: ignore 25 | 26 | 27 | def test_tqdm_no_callback(): 28 | tqdm.progress_callback = None 29 | t = tqdm() 30 | t.update(5) 31 | 32 | 33 | def test_timeit(mocker, capsys): 34 | """It should evaluate a function and print how long it took to run it.""" 35 | # arrange 36 | m = Mock(return_value="function") 37 | mocker.patch("time.time", return_value=0) 38 | # act 39 | actual = timeit(m, 1, 2, 3) 40 | # assert 41 | assert actual == "function" 42 | assert capsys.readouterr().out == "elapsed: 0.000s\n" 43 | 44 | 45 | def test_run_model(mocker): 46 | # arrange 47 | model_m = Mock() 48 | model_m.__name__ = "InpatientsModel" 49 | 50 | params = {"start_year": 2020, "end_year": 2022, "model_runs": 2} 51 | mocker.patch("os.cpu_count", return_value=2) 52 | 53 | pool_ctx_mock = mocker.patch("multiprocessing.get_context") 54 | pool_mock = pool_ctx_mock().Pool 55 | pool_ctm = pool_mock.return_value.__enter__.return_value 56 | pool_ctm.name = "pool" 57 | pool_ctm.imap = Mock(wraps=lambda f, i, **kwargs: map(f, i)) 58 | 59 | pc_m = Mock() 60 | 61 | pool_ctx_mock.reset_mock() 62 | 63 | # act 64 | actual = _run_model(model_m, params, "data", "hsa", "run_params", pc_m, False) # type: ignore 65 | 66 | # assert 67 | pool_ctm.imap.assert_called_once_with(model_m().go, [1, 2], chunksize=1) 68 | assert actual == [model_m().go()] * 3 69 | pc_m.assert_called_once_with(2) 70 | 71 | pool_ctx_mock.assert_called_once_with("spawn") 72 | pool_mock.assert_called_once_with(2) 73 | 74 | 75 | def test_noop_progress_callback(): 76 | # arrange, act & assert 77 | assert not noop_progress_callback("a")("b") 78 | 79 | 80 | def test_run_all(mocker): 81 | # arrange 82 | grp_m = mocker.patch( 83 | "nhp.model.run.Model.generate_run_params", 84 | return_value={"variant": "variants"}, 85 | ) 86 | hsa_m = mocker.patch("nhp.model.run.HealthStatusAdjustmentInterpolated", return_value="hsa") 87 | 88 | rm_m = mocker.patch("nhp.model.run._run_model", side_effect=["ip", "op", "aae"]) 89 | cr_m = mocker.patch( 90 | "nhp.model.run.combine_results", 91 | return_value=({"default": "combined_results"}, "combined_step_counts"), 92 | ) 93 | gr_m = mocker.patch("nhp.model.run.generate_results_json", return_value="results_json_path") 94 | sr_m = mocker.patch("nhp.model.run.save_results_files", return_value="results_paths") 95 | 96 | pc_m = Mock() 97 | pc_m().return_value = "progress callback" 98 | pc_m.reset_mock() 99 | 100 | params = { 101 | "id": "1", 102 | "dataset": "synthetic", 103 | "scenario": "test", 104 | "start_year": 2020, 105 | "end_year": 2025, 106 | "model_runs": 10, 107 | "create_datetime": "20230123_012345", 108 | } 109 | data_mock = Mock(return_value="nhp_data") 110 | 111 | # act 112 | actual = run_all(params, data_mock, pc_m, False) 113 | 114 | # assert 115 | assert actual == ("results_paths", "results_json_path") 116 | 117 | data_mock.assert_called_once_with(2020, "synthetic") 118 | 119 | assert pc_m.call_args_list == [ 120 | call("Inpatients"), 121 | call("Outpatients"), 122 | call("AaE"), 123 | ] 124 | 125 | grp_m.assert_called_once_with(params) 126 | hsa_m.assert_called_once_with("nhp_data", 2020) 127 | 128 | assert rm_m.call_args_list == [ 129 | call( 130 | m, 131 | params, 132 | data_mock, 133 | "hsa", 134 | {"variant": "variants"}, 135 | pc_m(), 136 | False, 137 | ) 138 | for m in [InpatientsModel, OutpatientsModel, AaEModel] 139 | ] 140 | 141 | cr_m.assert_called_once_with(["ip", "op", "aae"]) 142 | gr_m.assert_called_once_with( 143 | {"default": "combined_results", "step_counts": "combined_step_counts"}, 144 | "combined_step_counts", 145 | params, 146 | {"variant": "variants"}, 147 | ) 148 | sr_m.assert_called_once_with( 149 | {"default": "combined_results", "step_counts": "combined_step_counts"}, params 150 | ) 151 | 152 | 153 | def test_run_single_model_run(mocker, capsys): 154 | """It should run the model and display outputs.""" 155 | # arrange 156 | mr_mock = Mock() 157 | ndl_mock = mocker.patch("nhp.model.run.Local") 158 | ndl_mock.create.return_value = "nhp_data" 159 | 160 | results_m = { 161 | "default": pd.DataFrame( 162 | { 163 | "pod": ["a", "b"] * 4 + ["c"], 164 | "measure": [i for i in ["x", "y"] for _ in [1, 2]] * 2 + ["x"], 165 | "value": range(9), 166 | } 167 | ) 168 | } 169 | step_counts_m = pd.DataFrame( 170 | { 171 | "change_factor": ["a", "b"] * 4 + ["c"], 172 | "measure": [i for i in ["x", "y"] for _ in [1, 2]] * 2 + ["x"], 173 | "value": range(9), 174 | } 175 | ) 176 | 177 | timeit_mock = mocker.patch( 178 | "nhp.model.run.timeit", 179 | side_effect=[None, mr_mock, (results_m, step_counts_m)], 180 | ) 181 | params = {"dataset": "synthetic", "start_year": 2020, "end_year": 2025} 182 | 183 | # act 184 | run_single_model_run(params, "data", "model_type", 0) # type: ignore 185 | 186 | # assert 187 | ndl_mock.create.assert_called_once_with("data") 188 | 189 | assert timeit_mock.call_count == 3 190 | assert timeit_mock.call_args_list[0] == call("model_type", params, "nhp_data") 191 | assert timeit_mock.call_args_list[2] == call(mr_mock.get_aggregate_results) 192 | 193 | assert capsys.readouterr().out == "\n".join( 194 | [ 195 | "initialising model... running model... aggregating results... ", 196 | "change factors:", 197 | " value ", 198 | "measure x y", 199 | "change_factor ", 200 | "a 4 8", 201 | "b 6 10", 202 | "c 8 0", 203 | "total 18 18", 204 | "", 205 | "aggregated (default) results:", 206 | " value ", 207 | "measure x y", 208 | "pod ", 209 | "a 4.0 8.0", 210 | "b 6.0 10.0", 211 | "c 8.0 0.0", 212 | "total 18.0 18.0", 213 | "", 214 | ] 215 | ) 216 | -------------------------------------------------------------------------------- /src/nhp/model/run.py: -------------------------------------------------------------------------------- 1 | """Run the model.""" 2 | 3 | import logging 4 | import multiprocessing 5 | import os 6 | import time 7 | from typing import Any, Callable, Tuple, Type 8 | 9 | from tqdm.auto import tqdm as base_tqdm 10 | 11 | from nhp.model.aae import AaEModel 12 | from nhp.model.data import Data, Local 13 | from nhp.model.health_status_adjustment import HealthStatusAdjustmentInterpolated 14 | from nhp.model.inpatients import InpatientsModel 15 | from nhp.model.model import Model 16 | from nhp.model.model_iteration import ModelIteration, ModelRunResult 17 | from nhp.model.outpatients import OutpatientsModel 18 | from nhp.model.results import combine_results, generate_results_json, save_results_files 19 | 20 | 21 | class tqdm(base_tqdm): # ty: ignore[unsupported-base] 22 | """Custom tqdm class that provides a callback function on update.""" 23 | 24 | # ideally this would be set in the contstructor, but as this is a pretty 25 | # simple use case just implemented as a static variable. this does mean that 26 | # you need to update the value before using the class (each time) 27 | progress_callback = None 28 | 29 | def update(self, n=1): 30 | """Overide the default tqdm update function to run the callback method.""" 31 | super().update(n) 32 | if tqdm.progress_callback: 33 | tqdm.progress_callback(self.n) 34 | 35 | 36 | def timeit(func: Callable, *args) -> Any: 37 | """Time how long it takes to evaluate function `f` with arguments `*args`.""" 38 | start = time.time() 39 | results = func(*args) 40 | print(f"elapsed: {time.time() - start:.3f}s") 41 | return results 42 | 43 | 44 | def _run_model( 45 | model_type: Type[Model], 46 | params: dict, 47 | data: Callable[[int, str], Data], 48 | hsa: Any, 49 | run_params: dict, 50 | progress_callback: Callable[[Any], None], 51 | save_full_model_results: bool, 52 | ) -> list[ModelRunResult]: 53 | """Run the model iterations. 54 | 55 | Runs the model for all of the model iterations, returning the aggregated results. 56 | 57 | Args: 58 | model_type: The type of model that we want to run. 59 | params: The parameters to run the model with. 60 | data: A callable that creates a Data instance. 61 | hsa: An instance of the HealthStatusAdjustment class. 62 | run_params: The generated run parameters for the model run. 63 | progress_callback: A callback function for progress updates. 64 | save_full_model_results: Whether to save full model results. 65 | 66 | Returns: 67 | A list containing the aggregated results for all model runs. 68 | """ 69 | model_class = model_type.__name__[:-5] 70 | logging.info("%s", model_class) 71 | logging.info(" * instantiating") 72 | # ignore type issues here: Model has different arguments to Inpatients/Outpatients/A&E 73 | model = model_type(params, data, hsa, run_params, save_full_model_results) # type: ignore 74 | logging.info(" * running") 75 | 76 | # set the progress callback for this run 77 | tqdm.progress_callback = progress_callback 78 | 79 | # model run 0 is the baseline 80 | # model run 1:n are the monte carlo sims 81 | model_runs = [i + 1 for i in range(params["model_runs"])] 82 | 83 | cpus = os.cpu_count() 84 | batch_size = int(os.getenv("BATCH_SIZE", "1")) 85 | 86 | ctx = multiprocessing.get_context("spawn") 87 | with ctx.Pool(cpus) as pool: 88 | baseline = model.go(0) # baseline 89 | model_results: list[ModelRunResult] = list( 90 | tqdm( 91 | pool.imap( 92 | model.go, 93 | model_runs, 94 | chunksize=batch_size, 95 | ), 96 | f"Running {model.__class__.__name__[:-5].rjust(11)} model", 97 | total=len(model_runs), 98 | ) 99 | ) 100 | logging.info(" * finished") 101 | # ensure that the callback reports all model runs are complete 102 | progress_callback(params["model_runs"]) 103 | 104 | return [baseline, *model_results] 105 | 106 | 107 | def noop_progress_callback(_: Any) -> Callable[[Any], None]: 108 | """A no-op callback.""" 109 | return lambda _: None 110 | 111 | 112 | def run_all( 113 | params: dict, 114 | nhp_data: Callable[[int, str], Data], 115 | progress_callback: Callable[[Any], Callable[[Any], None]] = noop_progress_callback, 116 | save_full_model_results: bool = False, 117 | ) -> Tuple[list, str]: 118 | """Run the model. 119 | 120 | Runs all 3 model types, aggregates and combines the results. 121 | 122 | Args: 123 | params: The parameters to use for this model run. 124 | nhp_data: The Data class to use for loading data. 125 | progress_callback: A callback function for updating progress. 126 | Defaults to noop_progress_callback. 127 | save_full_model_results: Whether to save full model results. Defaults to False. 128 | 129 | Returns: 130 | A tuple containing the list of saved files and the filename of the JSON results. 131 | """ 132 | model_types = [InpatientsModel, OutpatientsModel, AaEModel] 133 | run_params = Model.generate_run_params(params) 134 | 135 | # set the data path in the HealthStatusAdjustment class 136 | hsa = HealthStatusAdjustmentInterpolated( 137 | nhp_data(params["start_year"], params["dataset"]), params["start_year"] 138 | ) 139 | 140 | results, step_counts = combine_results( 141 | [ 142 | _run_model( 143 | m, 144 | params, 145 | nhp_data, 146 | hsa, 147 | run_params, 148 | progress_callback(m.__name__[:-5]), 149 | save_full_model_results, 150 | ) 151 | for m in model_types 152 | ] 153 | ) 154 | 155 | json_filename = generate_results_json(results, step_counts, params, run_params) 156 | 157 | # TODO: once generate_results_json is deperecated this step should be moved into combine_results 158 | results["step_counts"] = step_counts 159 | # TODO: this should be what the model returns once generate_results_json is deprecated 160 | saved_files = save_results_files(results, params) 161 | 162 | return saved_files, json_filename 163 | 164 | 165 | def run_single_model_run( 166 | params: dict, data_path: str, model_type: Type[Model], model_run: int 167 | ) -> None: 168 | """Runs a single model iteration for easier debugging in vscode.""" 169 | data = Local.create(data_path) 170 | 171 | print("initialising model... ", end="") 172 | model = timeit(model_type, params, data) 173 | print("running model... ", end="") 174 | m_run = timeit(ModelIteration, model, model_run) 175 | print("aggregating results... ", end="") 176 | model_results, step_counts = timeit(m_run.get_aggregate_results) 177 | print() 178 | print("change factors:") 179 | step_counts = ( 180 | step_counts.reset_index() 181 | .groupby(["change_factor", "measure"], as_index=False)["value"] 182 | .sum() 183 | .pivot_table(index="change_factor", columns="measure") 184 | ) 185 | step_counts.loc["total"] = step_counts.sum() 186 | print(step_counts.fillna(0).astype(int)) 187 | print() 188 | print("aggregated (default) results:") 189 | 190 | default_results = ( 191 | model_results["default"] 192 | .reset_index() 193 | .groupby(["pod", "measure"], as_index=False) 194 | .agg({"value": "sum"}) 195 | .pivot_table(index=["pod"], columns="measure") 196 | .fillna(0) 197 | ) 198 | default_results.loc["total"] = default_results.sum() 199 | print(default_results) 200 | -------------------------------------------------------------------------------- /src/nhp/model/aae.py: -------------------------------------------------------------------------------- 1 | """Accident and Emergency Module. 2 | 3 | Implements the A&E model. 4 | """ 5 | 6 | from typing import Any, Callable, Tuple 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from nhp.model.data import Data 12 | from nhp.model.model import Model 13 | from nhp.model.model_iteration import ModelIteration 14 | 15 | 16 | class AaEModel(Model): 17 | """Accident and Emergency Model. 18 | 19 | Implementation of the Model for Accident and Emergency attendances. 20 | 21 | Args: 22 | params: The parameters to run the model with, or the path to a params file to load. 23 | data: A callable that creates a Data instance. 24 | hsa: An instance of the HealthStatusAdjustment class. If left as None an instance is 25 | created. Defaults to None. 26 | run_params: The parameters to use for each model run. Generated automatically if left as 27 | None. Defaults to None. 28 | save_full_model_results: Whether to save the full model results or not. Defaults to False. 29 | """ 30 | 31 | def __init__( 32 | self, 33 | params: dict | str, 34 | data: Callable[[int, str], Data], 35 | hsa: Any = None, 36 | run_params: dict | None = None, 37 | save_full_model_results: bool = False, 38 | ) -> None: 39 | """Initialise the A&E Model. 40 | 41 | Args: 42 | params: The parameters to use. 43 | data: A method to create a Data instance. 44 | hsa: Health Status Adjustment object. Defaults to None. 45 | run_params: The run parameters to use. Defaults to None. 46 | save_full_model_results: Whether to save full model results. Defaults to False. 47 | """ 48 | # call the parent init function 49 | super().__init__( 50 | "aae", 51 | ["arrivals"], 52 | params, 53 | data, 54 | hsa, 55 | run_params, 56 | save_full_model_results, 57 | ) 58 | 59 | def _get_data(self, data_loader: Data) -> pd.DataFrame: 60 | return data_loader.get_aae() 61 | 62 | def get_data_counts(self, data: pd.DataFrame) -> np.ndarray: 63 | """Get row counts of data. 64 | 65 | Args: 66 | data: The data to get the counts of. 67 | 68 | Returns: 69 | The counts of the data, required for activity avoidance steps. 70 | """ 71 | return np.array([data["arrivals"]]).astype(float) 72 | 73 | def _load_strategies(self, data_loader: Data) -> None: 74 | """Loads the activity mitigation strategies.""" 75 | data = self.data.set_index("rn") 76 | self.strategies = { 77 | "activity_avoidance": pd.concat( 78 | [ 79 | data[data[c]]["hsagrp"].str.replace("aae", n).rename("strategy") 80 | for (c, n) in [ 81 | ("is_frequent_attender", "frequent_attenders"), 82 | ("is_left_before_treatment", "left_before_seen"), 83 | ("is_low_cost_referred_or_discharged", "low_cost_discharged"), 84 | ("is_discharged_no_treatment", "discharged_no_treatment"), 85 | ] 86 | ] 87 | ) 88 | .to_frame() 89 | .assign(sample_rate=1) 90 | } 91 | 92 | def apply_resampling(self, row_samples: np.ndarray, data: pd.DataFrame) -> pd.DataFrame: 93 | """Apply row resampling. 94 | 95 | Called from within `model.activity_resampling.ActivityResampling.apply_resampling`. 96 | 97 | Args: 98 | row_samples: [1xn] array, where n is the number of rows in `data`, containing the new 99 | values for `data["arrivals"]`. 100 | data: The data that we want to update. 101 | 102 | Returns: 103 | The updated data. 104 | """ 105 | data["arrivals"] = row_samples[0] 106 | # return the altered data 107 | return data 108 | 109 | def efficiencies( 110 | self, data: pd.DataFrame, model_iteration: ModelIteration 111 | ) -> tuple[pd.DataFrame, pd.DataFrame | None]: 112 | """Run the efficiencies steps of the model. 113 | 114 | Args: 115 | data: The data to apply efficiencies to. 116 | model_iteration: An instance of the ModelIteration class. 117 | 118 | Returns: 119 | Tuple containing the updated data and step counts (None for A&E). 120 | """ 121 | # A&E doesn't have any efficiencies steps 122 | return data, None 123 | 124 | @staticmethod 125 | def process_results(data: pd.DataFrame) -> pd.DataFrame: 126 | """Process the data into a format suitable for aggregation in results files. 127 | 128 | Args: 129 | data: Data to be processed. Format should be similar to Model.data. 130 | 131 | Returns: 132 | Processed results. 133 | """ 134 | data["measure"] = "walk-in" 135 | data.loc[data["is_ambulance"], "measure"] = "ambulance" 136 | data = data.rename(columns={"arrivals": "value"}) 137 | 138 | # summarise the results to make the create_agg steps quicker 139 | data = ( 140 | data.groupby( # ty: ignore[no-matching-overload] 141 | # note: any columns used in the calls to _create_agg, including pod and measure 142 | # must be included below 143 | [ 144 | "pod", 145 | "sitetret", 146 | "acuity", 147 | "measure", 148 | "sex", 149 | "age", 150 | "age_group", 151 | "attendance_category", 152 | ], 153 | dropna=False, 154 | as_index=False, 155 | ) 156 | .agg({"value": "sum"}) 157 | .fillna("unknown") 158 | ) 159 | return data 160 | 161 | def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]: 162 | """Create other aggregations specific to the model type. 163 | 164 | Args: 165 | model_results: The results of a model run. 166 | 167 | Returns: 168 | Dictionary containing the specific aggregations. 169 | """ 170 | return { 171 | "acuity": self.get_agg(model_results, "acuity"), 172 | "attendance_category": self.get_agg(model_results, "attendance_category"), 173 | } 174 | 175 | def calculate_avoided_activity( 176 | self, data: pd.DataFrame, data_resampled: pd.DataFrame 177 | ) -> pd.DataFrame: 178 | """Calculate the rows that have been avoided. 179 | 180 | Args: 181 | data: The data before the binomial thinning step. 182 | data_resampled: The data after the binomial thinning step. 183 | 184 | Returns: 185 | The data that was avoided in the binomial thinning step. 186 | """ 187 | avoided = data["arrivals"] - data_resampled["arrivals"] 188 | data["arrivals"] = avoided 189 | return data 190 | 191 | def save_results(self, model_iteration: ModelIteration, path_fn: Callable[[str], str]) -> None: 192 | """Save the results of running the model. 193 | 194 | This method is used for saving the results of the model run to disk as a parquet file. 195 | It saves just the `rn` (row number) column and the `arrivals`, with the intention that 196 | you rejoin to the original data. 197 | 198 | Args: 199 | model_iteration: An instance of the ModelIteration class. 200 | path_fn: A function which takes the activity type and returns a path. 201 | """ 202 | model_iteration.get_model_results().set_index(["rn"])[["arrivals"]].to_parquet( 203 | f"{path_fn('aae')}/0.parquet" 204 | ) 205 | model_iteration.avoided_activity.set_index(["rn"])[["arrivals"]].to_parquet( 206 | f"{path_fn('aae_avoided')}/0.parquet" 207 | ) 208 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test_aae.py: -------------------------------------------------------------------------------- 1 | """Test a&e model.""" 2 | 3 | from unittest.mock import Mock, call, patch 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import pytest 8 | 9 | from nhp.model.aae import AaEModel 10 | 11 | 12 | # fixtures 13 | @pytest.fixture 14 | def mock_model(): 15 | """Create a mock Model instance.""" 16 | with patch.object(AaEModel, "__init__", lambda s, p, d, h, r: None): 17 | mdl = AaEModel(None, None, None, None) # type: ignore 18 | mdl.model_type = "aae" 19 | mdl.params = { 20 | "dataset": "synthetic", 21 | "model_runs": 3, 22 | "seed": 1, 23 | "demographic_factors": { 24 | "file": "demographics_file.csv", 25 | "variant_probabilities": {"a": 0.6, "b": 0.4}, 26 | }, 27 | "start_year": 2018, 28 | "end_year": 2020, 29 | "health_status_adjustment": [0.8, 1.0], 30 | "waiting_list_adjustment": "waiting_list_adjustment", 31 | "expat": { 32 | "aae": {"ambulance": [0.7, 0.9]}, 33 | "repat_local": {"aae": {"ambulance": [1.0, 1.2]}}, 34 | "repat_nonlocal": {"aae": {"ambulance": [1.3, 1.5]}}, 35 | }, 36 | "non-demographic_adjustment": { 37 | "a": {"a_a": [1, 1.2], "a_b": [1, 1.2]}, 38 | "b": {"b_a": [1, 1.2], "b_b": [1, 1.2]}, 39 | }, 40 | "inpatient_factors": { 41 | "admission_avoidance": { 42 | "a_a": {"interval": [0.4, 0.6]}, 43 | "a_b": {"interval": [0.4, 0.6]}, 44 | }, 45 | "los_reduction": { 46 | "b_a": {"interval": [0.4, 0.6]}, 47 | "b_b": {"interval": [0.4, 0.6]}, 48 | }, 49 | }, 50 | "outpatient_factors": { 51 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}}, 52 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}}, 53 | }, 54 | "aae_factors": { 55 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}}, 56 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}}, 57 | }, 58 | } 59 | # create a minimal data object for testing 60 | mdl.data = pd.DataFrame( 61 | { 62 | "rn": list(range(1, 21)), 63 | "age": list(range(1, 6)) * 4, 64 | "sex": ([1] * 5 + [2] * 5) * 2, 65 | "hsagrp": [x for _ in range(1, 11) for x in ["aae_a_a", "aae_b_b"]], 66 | } 67 | ) 68 | return mdl 69 | 70 | 71 | # methods 72 | 73 | 74 | def test_init_calls_super_init(mocker): 75 | """Test that the model calls the super method.""" 76 | # arrange 77 | super_mock = mocker.patch("nhp.model.aae.super") 78 | # act 79 | AaEModel("params", "data_path", "hsa", "run_params") # type: ignore 80 | # assert 81 | super_mock.assert_called_once() 82 | 83 | 84 | def test_get_data(mock_model): 85 | # arrange 86 | mdl = mock_model 87 | data_loader = Mock() 88 | data_loader.get_aae.return_value = "aae data" 89 | 90 | # act 91 | actual = mdl._get_data(data_loader) 92 | 93 | # assert 94 | assert actual == "aae data" 95 | data_loader.get_aae.assert_called_once_with() 96 | 97 | 98 | def test_get_data_counts(mock_model): 99 | # arrange 100 | mdl = mock_model 101 | data = mdl.data 102 | data["arrivals"] = list(range(1, 21)) 103 | # act 104 | actual = mdl.get_data_counts(data) 105 | # assert 106 | assert actual.tolist() == [[float(i) for i in range(1, 21)]] 107 | 108 | 109 | def test_load_strategies(mock_model): 110 | # arrange 111 | mdl = mock_model 112 | mdl.data["is_frequent_attender"] = [False] * 0 + [True] * 4 + [False] * 16 113 | mdl.data["is_left_before_treatment"] = [False] * 4 + [True] * 4 + [False] * 12 114 | mdl.data["is_low_cost_referred_or_discharged"] = [False] * 12 + [True] * 4 + [False] * 4 115 | mdl.data["is_discharged_no_treatment"] = [False] * 16 + [True] * 4 116 | # act 117 | mdl._load_strategies(None) 118 | # assert 119 | assert mdl.strategies["activity_avoidance"]["strategy"].to_list() == [ 120 | "frequent_attenders_a_a", 121 | "frequent_attenders_b_b", 122 | "frequent_attenders_a_a", 123 | "frequent_attenders_b_b", 124 | "left_before_seen_a_a", 125 | "left_before_seen_b_b", 126 | "left_before_seen_a_a", 127 | "left_before_seen_b_b", 128 | "low_cost_discharged_a_a", 129 | "low_cost_discharged_b_b", 130 | "low_cost_discharged_a_a", 131 | "low_cost_discharged_b_b", 132 | "discharged_no_treatment_a_a", 133 | "discharged_no_treatment_b_b", 134 | "discharged_no_treatment_a_a", 135 | "discharged_no_treatment_b_b", 136 | ] 137 | assert mdl.strategies["activity_avoidance"]["sample_rate"].to_list() == [1] * 16 138 | 139 | 140 | def test_apply_resampling(mocker, mock_model): 141 | # arrange 142 | row_samples = np.array([[1, 2, 3, 4]]) 143 | # act 144 | data = mock_model.apply_resampling(row_samples, pd.DataFrame()) 145 | # assert 146 | assert data["arrivals"].to_list() == [1, 2, 3, 4] 147 | 148 | 149 | def test_efficiencies(mock_model): 150 | """Test the efficiencies method (pass).""" 151 | # arrange 152 | 153 | # act 154 | actual = mock_model.efficiencies("data", None) 155 | 156 | # assert 157 | assert actual == ("data", None) 158 | 159 | 160 | def test_specific_aggregations(mocker, mock_model): 161 | """Test that it aggregates the results correctly.""" 162 | # arrange 163 | m = mocker.patch("nhp.model.AaEModel.get_agg", return_value="agg_data") 164 | 165 | mdl = mock_model 166 | 167 | # act 168 | actual = mdl.specific_aggregations("results") # type: ignore 169 | 170 | # assert 171 | assert actual == { 172 | "acuity": "agg_data", 173 | "attendance_category": "agg_data", 174 | } 175 | 176 | assert m.call_args_list == [ 177 | call("results", "acuity"), 178 | call("results", "attendance_category"), 179 | ] 180 | 181 | 182 | def test_process_results(mock_model): 183 | # arrange 184 | data = pd.DataFrame( 185 | { 186 | "sitetret": ["trust"] * 4, 187 | "acuity": ["a", "a", "b", "b"], 188 | "attendance_category": [1, 1, 2, 2], 189 | "age": [1, 2, 3, 4], 190 | "age_group": [1] * 4, 191 | "sex": [1] * 4, 192 | "pod": ["aae_type-01", "aae_type-01", "aae_type-02", "aae_type-02"], 193 | "is_ambulance": [True, False, True, False], 194 | "value": [1, 2, 3, 4], 195 | } 196 | ) 197 | 198 | expected = { 199 | "pod": ["aae_type-01", "aae_type-01", "aae_type-02", "aae_type-02"], 200 | "sitetret": ["trust"] * 4, 201 | "acuity": ["a", "a", "b", "b"], 202 | "measure": ["ambulance", "walk-in"] * 2, 203 | "sex": [1] * 4, 204 | "age": [1, 2, 3, 4], 205 | "age_group": [1] * 4, 206 | "attendance_category": [1, 1, 2, 2], 207 | "value": [1, 2, 3, 4], 208 | } 209 | # act 210 | actual = mock_model.process_results(data) 211 | 212 | # assert 213 | assert actual.to_dict("list") == expected 214 | 215 | 216 | def test_save_results(mocker, mock_model): 217 | """Test that it correctly saves the results.""" 218 | 219 | def path_fn(x): 220 | return x 221 | 222 | mr_mock = Mock() 223 | mr_mock.get_model_results.return_value = pd.DataFrame({"rn": [0], "arrivals": [1]}) 224 | mr_mock.avoided_activity = pd.DataFrame({"rn": [0], "arrivals": [1]}) 225 | 226 | to_parquet_mock = mocker.patch("pandas.DataFrame.to_parquet") 227 | mock_model.save_results(mr_mock, path_fn) 228 | assert to_parquet_mock.call_args_list[0] == call("aae/0.parquet") 229 | assert to_parquet_mock.call_args_list[1] == call("aae_avoided/0.parquet") 230 | 231 | 232 | def test_calculate_avoided_activity(mock_model): 233 | # arrange 234 | data = pd.DataFrame({"rn": [0, 1], "arrivals": [4, 3]}) 235 | data_resampled = pd.DataFrame({"rn": [0, 1], "arrivals": [2, 1]}) 236 | # act 237 | actual = mock_model.calculate_avoided_activity(data, data_resampled) 238 | # assert 239 | assert actual.to_dict(orient="list") == {"rn": [0, 1], "arrivals": [2, 2]} 240 | -------------------------------------------------------------------------------- /src/nhp/model/health_status_adjustment.py: -------------------------------------------------------------------------------- 1 | """Health Status Adjustment.""" 2 | 3 | from math import pi, sqrt 4 | from typing import List 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from nhp.model.data import Data, reference 10 | 11 | 12 | class HealthStatusAdjustment: 13 | """Health Status Adjustment. 14 | 15 | Handles the logic for the health status adjustment in the model. 16 | """ 17 | 18 | # load the static reference data files 19 | 20 | def __init__(self, data_loader: Data, base_year: str): 21 | """Initialise HealthStatusAdjustment. 22 | 23 | Base class that should not be used directly, instead see HealthStatusAdjustmentGAM 24 | or HealthStatusAdjustmentInterpolated. 25 | 26 | Args: 27 | data_loader: The data loader. 28 | base_year: The baseline year for the model run. 29 | """ 30 | self._all_ages = np.arange(0, 101) 31 | 32 | self._load_life_expectancy_series(base_year) 33 | self._load_activity_ages(data_loader) 34 | self._cache = {} 35 | 36 | def _load_life_expectancy_series(self, base_year: str): 37 | # the age range that health status adjustment runs for 38 | # hardcoded to max out at 90 as ages >90 are mapped to 90 39 | self._ages = np.arange(55, 91) 40 | # load the life expectancy file, only select the rows for the ages we are interested in 41 | lexc = reference.life_expectancy().set_index(["var", "sex", "age"]) 42 | lexc = lexc[lexc.index.isin(self._ages, level=2)] 43 | # calculate the life expectancy (change) between the model year and base year 44 | self._life_expectancy = lexc.apply(lambda x: x - lexc[str(base_year)]) 45 | 46 | def _load_activity_ages(self, data_loader: Data): 47 | self._activity_ages = ( 48 | data_loader.get_hsa_activity_table().set_index(["hsagrp", "sex", "age"]).sort_index() 49 | )["activity"] 50 | 51 | @staticmethod 52 | def generate_params( 53 | start_year: int, 54 | end_year: int, 55 | variants: List[str], 56 | rng: np.random.Generator, 57 | model_runs: int, 58 | ) -> np.ndarray: 59 | """Generate Health Status Adjustment Parameters. 60 | 61 | Args: 62 | start_year: The baseline year for the model. 63 | end_year: The year the model is running for. 64 | variants: List of population variants. 65 | rng: Random Number Generator. 66 | model_runs: Number of Model Runs. 67 | 68 | Returns: 69 | Parameters for the health status adjustment. 70 | """ 71 | hsa_snp = reference.split_normal_params().set_index(["var", "sex", "year"]) 72 | 73 | def gen(variant, sex): 74 | mode: float 75 | sd1: float 76 | sd2: float 77 | mode, sd1, sd2 = hsa_snp.loc[(variant, sex, end_year)] # type: ignore 78 | 79 | return np.concatenate( 80 | [ 81 | [mode], 82 | HealthStatusAdjustment.random_splitnorm(rng, model_runs, mode, sd1, sd2), 83 | hsa_snp.loc[(variant, sex, np.arange(start_year + 1, end_year)), "mode"], # type: ignore 84 | ] 85 | ) 86 | 87 | values = { 88 | v: np.transpose([gen(v, "m"), gen(v, "f")]) 89 | for v in hsa_snp.index.levels[0] # type: ignore 90 | } 91 | 92 | variant_lookup = reference.variant_lookup() 93 | return np.array( 94 | [ 95 | values[variant_lookup[v]][i] 96 | for i, v in enumerate(variants + variants[0:1] * (end_year - start_year - 1)) 97 | ] 98 | ) 99 | 100 | @staticmethod 101 | def random_splitnorm( 102 | rng: np.random.Generator, 103 | n: int, 104 | mode: float, 105 | sd1: float, 106 | sd2: float, 107 | ) -> np.ndarray: 108 | """Generate random splitnormal values. 109 | 110 | Args: 111 | rng: Random Number Generator. 112 | n: Number of random values to generate. 113 | mode: The mode of the distribution. 114 | sd1: The standard deviation of the left side of the distribution. 115 | sd2: The standard deviation of the right side of the distribution. 116 | 117 | Returns: 118 | n random number values sampled from the split normal distribution. 119 | """ 120 | # lazy import for performance 121 | import scipy.stats as spt # noqa: PLC0415 122 | 123 | # get the probability of the mode 124 | A = sqrt(2 / pi) / (sd1 + sd2) 125 | a_sqrt_tau = A * sqrt(2 * pi) 126 | p = (a_sqrt_tau * sd1) / 2 127 | 128 | # generate n random uniform values 129 | u = rng.uniform(size=n) 130 | 131 | # whether u is less than the mode or not 132 | a1 = u <= p 133 | 134 | # make a single sd vector 135 | sd = np.array([sd1 if i else sd2 for i in a1]) 136 | x = np.array([0 if i else a_sqrt_tau * sd2 - 1 for i in a1]) 137 | 138 | return mode + sd * spt.norm.ppf((u + x) / (a_sqrt_tau * sd)) 139 | 140 | def run(self, run_params: dict) -> pd.Series: 141 | """Return factor for health status adjustment. 142 | 143 | Args: 144 | run_params: The run parameters. 145 | 146 | Returns: 147 | The health status adjustment factor. 148 | """ 149 | hsa_param = run_params["health_status_adjustment"] 150 | selected_variant = reference.variant_lookup()[run_params["variant"]] 151 | cache_key = (*hsa_param, selected_variant) 152 | if cache_key in self._cache: 153 | return self._cache[cache_key] 154 | 155 | lexc = self._life_expectancy.loc[(selected_variant, slice(None), slice(None))][ # type: ignore 156 | str(run_params["year"]) 157 | ] 158 | hsa_param = np.repeat(hsa_param, len(self._ages)) 159 | adjusted_ages = np.tile(self._ages, 2) - lexc * hsa_param 160 | 161 | factor = ( 162 | self._predict_activity(adjusted_ages).rename_axis(["hsagrp", "sex", "age"]) 163 | / self._activity_ages.loc[slice(None), slice(None), self._ages] # type: ignore 164 | ).rename("health_status_adjustment") 165 | 166 | # if any factor goes below 0, set it to 0 167 | factor[factor < 0] = 0 168 | 169 | self._cache[cache_key] = factor 170 | return factor 171 | 172 | def _predict_activity(self, adjusted_ages): 173 | raise NotImplementedError() 174 | 175 | 176 | class HealthStatusAdjustmentGAM(HealthStatusAdjustment): 177 | """Health Status Adjustment (GAMs).""" 178 | 179 | def __init__(self, data: Data, base_year: str): 180 | """Initialise HealthStatusAdjustmentGAM. 181 | 182 | Args: 183 | data: The data loader. 184 | base_year: The baseline year for the model run. 185 | """ 186 | self._gams = data.get_hsa_gams() 187 | 188 | super().__init__(data, base_year) 189 | 190 | def _predict_activity(self, adjusted_ages): 191 | return pd.concat( 192 | { 193 | (h, s): pd.Series( 194 | g.predict(adjusted_ages.loc[s]), 195 | index=self._ages, 196 | ).apply(lambda x: x if x > 0 else 0) 197 | for (h, s), g in self._gams.items() 198 | } 199 | ) 200 | 201 | 202 | class HealthStatusAdjustmentInterpolated(HealthStatusAdjustment): 203 | """Health Status Adjustment (Interpolated).""" 204 | 205 | def __init__(self, data: Data, base_year: str): 206 | """Initialise HealthStatusAdjustmentInterpolated. 207 | 208 | Args: 209 | data: The data loader. 210 | base_year: The baseline year for the model run. 211 | """ 212 | super().__init__(data, base_year) 213 | self._load_activity_ages_lists() 214 | 215 | def _load_activity_ages_lists(self): 216 | self._activity_ages_lists = self._activity_ages.groupby(level=[0, 1]).agg(list) 217 | 218 | def _predict_activity(self, adjusted_ages): 219 | return pd.concat( 220 | { 221 | (h, s): pd.Series( 222 | np.interp(adjusted_ages.loc[s], self._all_ages, v), 223 | index=self._ages, 224 | ).apply(lambda x: x if x > 0 else 0) 225 | for (h, s), v in self._activity_ages_lists.items() # type: ignore 226 | } 227 | ) 228 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test_inpatient_efficiencies.py: -------------------------------------------------------------------------------- 1 | """Test inpatient efficiencies.""" 2 | 3 | from unittest.mock import Mock, patch 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import pytest 8 | 9 | from nhp.model.inpatients import InpatientEfficiencies 10 | 11 | 12 | @pytest.fixture 13 | def mock_ipe(): 14 | """Create a mock Model instance.""" 15 | with patch.object(InpatientEfficiencies, "__init__", lambda s, d, m: None): 16 | ipe = InpatientEfficiencies(None, None) # type: ignore 17 | ipe._model_iteration = Mock() 18 | ipe.losr = pd.DataFrame( 19 | { 20 | "type": [x for x in ["all", "sdec", "pre-op"] for _ in [0, 1]] 21 | + ["day_procedures_daycase", "day_procedures_outpatients"], 22 | "pre-op_days": [pd.NA] * 4 + [1, 2] + [pd.NA] * 2, 23 | "losr_f": [1 - 1 / (2**x) for x in range(8)], 24 | }, 25 | index=["a", "b", "c", "d", "e", "f", "g", "h"], 26 | ) 27 | return ipe 28 | 29 | 30 | def test_init(mocker): 31 | # arrange 32 | mocker.patch("nhp.model.inpatients.InpatientEfficiencies._select_single_strategy") 33 | mocker.patch("nhp.model.inpatients.InpatientEfficiencies._generate_losr_df") 34 | 35 | model_iteration = Mock() 36 | model_iteration.model_run = 0 37 | data = pd.DataFrame({"speldur": [1, 2, 3]}) 38 | model_iteration.step_counts = "step_counts" 39 | model_iteration.model.strategies = {"efficiencies": "efficiencies"} 40 | 41 | # act 42 | actual = InpatientEfficiencies(data, model_iteration) 43 | 44 | # assert 45 | assert actual._model_iteration == model_iteration 46 | assert actual.data.equals(data) 47 | assert actual.strategies == "efficiencies" 48 | assert actual.speldur_before.to_list() == [1, 2, 3] 49 | 50 | actual._select_single_strategy.assert_called_once() # type: ignore 51 | actual._generate_losr_df.assert_called_once() # type: ignore 52 | 53 | 54 | def test_select_single_strategy(mock_ipe): 55 | # arrange 56 | m = mock_ipe 57 | m._model_iteration.rng = np.random.default_rng(0) 58 | m.data = pd.DataFrame({"rn": list(range(5)), "admimeth": ["0"] * 4 + ["3"]}) 59 | m._model_iteration.model.strategies = { 60 | "efficiencies": pd.DataFrame({"strategy": ["a"] * 3 + ["b"] * 3}, index=[1, 2, 3] * 2) 61 | } 62 | m._model_iteration.params = {"efficiencies": {"ip": {"a": 2, "b": 3, "c": 4}}} 63 | 64 | # act 65 | m._select_single_strategy() 66 | 67 | # assert 68 | assert m.data.index.fillna("NULL").to_list() == [ 69 | "NULL", 70 | "b", 71 | "b", 72 | "a", 73 | "NULL", 74 | ] 75 | 76 | 77 | def test_generate_losr_df(mock_ipe): 78 | # arrange 79 | m = mock_ipe 80 | 81 | m._model_iteration.params = { 82 | "efficiencies": { 83 | "ip": { 84 | "a": {"type": "1", "interval": [1, 3]}, 85 | "b": {"type": "1", "interval": [2, 4]}, 86 | "c": {"type": "2", "other": 1, "interval": [3, 5]}, 87 | } 88 | } 89 | } 90 | m._model_iteration.run_params = {"efficiencies": {"ip": {"a": 2, "b": 3, "c": 4}}} 91 | 92 | expected = { 93 | "type": ["1", "1", "2"], 94 | "interval": [[1, 3], [2, 4], [3, 5]], 95 | "other": [None, None, 1.0], 96 | "losr_f": [2, 3, 4], 97 | } 98 | 99 | # act 100 | m._generate_losr_df() 101 | actual = m.losr.to_dict(orient="list") 102 | actual["other"] = [None if np.isnan(i) else i for i in actual["other"]] 103 | 104 | # assert 105 | assert actual == expected 106 | 107 | 108 | @pytest.mark.parametrize("losr_type", ["all", "sdec", "pre-op"]) 109 | def test_losr_empty(mock_ipe, losr_type): 110 | """Test that if no preop strategy provided losr functions return self.""" 111 | # arrange 112 | m = mock_ipe 113 | m.losr = m.losr[m.losr.type != losr_type] 114 | m.data = pd.DataFrame({"speldur": list(range(9))}, index=["x", "a", "b"] * 3) 115 | 116 | # act / assert 117 | match losr_type: 118 | case "all": 119 | assert m.losr_all() == m 120 | case "sdec": 121 | assert m.losr_sdec() == m 122 | case "pre-op": 123 | assert m.losr_preop() == m 124 | 125 | 126 | def test_losr_all(mock_ipe): 127 | """Test that it reduces the speldur column for 'all' types.""" 128 | # arrange 129 | m = mock_ipe 130 | m.data = pd.DataFrame({"speldur": list(range(9))}, index=["x", "a", "b"] * 3) 131 | m._model_iteration.rng.binomial.return_value = np.arange(6) 132 | 133 | # act 134 | actual = m.losr_all() 135 | binomial_call_args = m._model_iteration.rng.binomial.call_args_list[0][0] 136 | 137 | # assert 138 | assert actual == m 139 | 140 | assert m.data["speldur"].to_list() == [0, 0, 3, 3, 1, 4, 6, 2, 5] 141 | 142 | assert binomial_call_args[0].to_list() == [1, 4, 7, 2, 5, 8] 143 | assert binomial_call_args[1].to_list() == [0, 0, 0, 0.5, 0.5, 0.5] 144 | 145 | 146 | def test_losr_sdec(mock_ipe): 147 | """Test that it reduces the speldur column for 'aec' types.""" 148 | # arrange 149 | m = mock_ipe 150 | m.data = pd.DataFrame( 151 | { 152 | "speldur": list(range(9)), 153 | "classpat": ["1"] * 9, 154 | }, 155 | index=["x", "c", "d"] * 3, 156 | ) 157 | m._model_iteration.rng.binomial.return_value = [0, 0, 1, 0, 1, 1] 158 | 159 | # act 160 | actual = m.losr_sdec() 161 | binomial_call_args = m._model_iteration.rng.binomial.call_args_list[0][0] 162 | 163 | # assert 164 | assert actual == m 165 | 166 | assert m.data["speldur"].to_list() == [0, 0, 0, 3, 0, 5, 6, 7, 8] 167 | assert m.data["classpat"].to_list() == [ 168 | "1", 169 | "-3", 170 | "-3", 171 | "1", 172 | "-3", 173 | "1", 174 | "1", 175 | "1", 176 | "1", 177 | ] 178 | 179 | assert binomial_call_args[0] == 1 180 | assert binomial_call_args[1].equals(m.losr.loc[["c"] * 3 + ["d"] * 3, "losr_f"]) 181 | 182 | 183 | def test_losr_preop(mock_ipe): 184 | """Test that is reduces the speldur column for 'pre-op' types.""" 185 | # arrange 186 | m = mock_ipe 187 | m.data = pd.DataFrame({"speldur": list(range(9))}, index=["x", "e", "f"] * 3) 188 | m._model_iteration.rng.binomial.return_value = [0, 1, 0, 1, 0, 1] 189 | 190 | # act 191 | actual = m.losr_preop() 192 | binomial_call_args = m._model_iteration.rng.binomial.call_args_list[0][0] 193 | 194 | # assert 195 | assert actual == m 196 | 197 | assert m.data["speldur"].to_list() == [0, 1, 0, 3, 3, 5, 6, 7, 6] 198 | 199 | assert binomial_call_args[0] == 1 200 | assert binomial_call_args[1].equals(1 - m.losr.loc[["e"] * 3 + ["f"] * 3, "losr_f"]) 201 | 202 | 203 | @pytest.mark.parametrize( 204 | "day_procedures_type, expected_speldur, expected_classpat", 205 | [ 206 | ( 207 | "day_procedures_daycase", 208 | [0, 1, 2, 3, 0, 5, 6, 7, 8] * 2, 209 | (["1"] * 4 + ["-2"] + ["1"] * 4) * 2, 210 | ), 211 | ( 212 | "day_procedures_outpatients", 213 | [0, 1, 2, 3, 4, 5, 6, 0, 8] * 2, 214 | (["1"] * 7 + ["-1"] + ["1"]) * 2, 215 | ), 216 | ], 217 | ) 218 | def test_losr_day_procedures(mock_ipe, day_procedures_type, expected_speldur, expected_classpat): 219 | """Test that it reduces the speldur column for 'day_procedures' types.""" 220 | # arrange 221 | m = mock_ipe 222 | strats = ["day_procedures_usually_dc", "day_procedures_usually_op"] 223 | # replace the index 224 | i = m.losr.index[~m.losr.type.str.startswith("day_procedures_")].to_list() + strats 225 | m.losr.index = i 226 | 227 | m.data = pd.DataFrame( 228 | { 229 | "speldur": list(range(9)) * 2, 230 | "classpat": ["1"] * 18, 231 | }, 232 | index=[x for x in ["x"] + strats for _ in range(3)] * 2, 233 | ) 234 | m._model_iteration.rng.binomial.return_value = np.tile([1, 0, 1], 2) 235 | m.step_counts = {} 236 | 237 | # act 238 | actual = m.losr_day_procedures(day_procedures_type) 239 | 240 | # assert 241 | assert actual == m 242 | 243 | assert m._model_iteration.rng.binomial.call_args[0][0] == 1 244 | assert ( 245 | m._model_iteration.rng.binomial.call_args[0][1] 246 | == m.losr[m.losr.type == day_procedures_type]["losr_f"].repeat(6) 247 | ).all() 248 | 249 | assert m.data["speldur"].to_list() == expected_speldur 250 | assert m.data["classpat"].to_list() == expected_classpat 251 | 252 | 253 | def test_get_step_counts(mock_ipe): 254 | # arrange 255 | mock_ipe.data = pd.DataFrame( 256 | { 257 | "rn": ["1", "2", "3", "1"], 258 | "pod": ["a", "a", "a", "a"], 259 | "sitetret": ["a", "a", "a", "a"], 260 | "classpat": ["-1", "1", "1", "1"], 261 | "speldur": [1, 2, 3, 4], 262 | }, 263 | index=["a", "b", "a", "a"], 264 | ) 265 | mock_ipe.speldur_before = [3, 4, 5, 6] 266 | 267 | # act 268 | actual = mock_ipe.get_step_counts() 269 | 270 | # assert 271 | assert actual.to_dict("list") == { 272 | "pod": ["a", "a"], 273 | "sitetret": ["a", "a"], 274 | "strategy": ["a", "b"], 275 | "admissions": [-1, 0], 276 | "beddays": [-7, -2], 277 | "change_factor": ["efficiencies", "efficiencies"], 278 | } 279 | -------------------------------------------------------------------------------- /src/nhp/model/data/reference/hsa_split_normal_params.csv: -------------------------------------------------------------------------------- 1 | var,sex,year,mode,sd1,sd2 2 | ppp,f,2020,-0.834139124472048,0.471364313830598,0.178658505716017 3 | ppp,f,2021,0.0917709146251422,0.036131621111269,0.0951795525527208 4 | ppp,f,2022,0.0536166368383793,0.0550077634560789,0.143073039832003 5 | ppp,f,2023,0.614412537448788,0.723130220541921,1.91266306661884 6 | ppp,f,2024,0.231095142009604,0.91139217468491,2.38418012311657 7 | ppp,f,2025,0.152008729180074,0.545496334363621,1.44256490718915 8 | ppp,f,2026,0.12446351318926,0.430806435794337,1.12531842730168 9 | ppp,f,2027,-0.00449203033812275,0.492163171476909,1.28655751090596 10 | ppp,f,2028,0.0126432843697,0.411838390705167,1.08255366531985 11 | ppp,f,2029,-0.0845170276686872,0.461454613119302,1.20842100540571 12 | ppp,f,2030,-0.0558685469792159,0.407905034677731,1.07422614501357 13 | ppp,f,2031,-0.0373610990433803,0.367034697951773,0.97828431168848 14 | ppp,f,2032,-0.102726179592795,0.400785988015702,1.0553364449507 15 | ppp,f,2033,-0.0809292556622157,0.375403586349822,0.980387648203298 16 | ppp,f,2034,-0.0650252171071374,0.350686144473125,0.924197174709526 17 | ppp,f,2035,-0.0530497688024056,0.336711955819495,0.880614039708151 18 | ppp,f,2036,-0.0972180283591862,0.356461296665913,0.934988090118291 19 | ppp,f,2037,-0.0837532290311653,0.340432413576408,0.898821814332704 20 | ppp,f,2038,-0.0730592327757793,0.326177849161222,0.864751590563446 21 | ppp,f,2039,-0.0644433388248815,0.322087793318307,0.836158863691571 22 | ppp,f,2040,-0.0980131878853271,0.332134169938724,0.877591369074311 23 | ppp,f,2041,-0.0885500471310261,0.321630773072594,0.851242804211436 24 | ppp,f,2042,-0.080692300423259,0.316536678604328,0.831289179237144 25 | ppp,f,2043,-0.0741188689527235,0.309193313445383,0.816114566681116 26 | ppp,m,2020,0.193742417540525,0.0181524421322472,0.0478105131873017 27 | ppp,m,2021,2.0779135826601,0.36322804004715,0.960165182522645 28 | ppp,m,2022,1.10719713339036,0.273608383470943,0.723845738383006 29 | ppp,m,2023,0.782288742589041,0.247085822613072,0.645577389082937 30 | ppp,m,2024,0.652673361248066,0.303970106573748,0.80826480754188 31 | ppp,m,2025,0.521119740007301,0.28024500654209,0.732039449987178 32 | ppp,m,2026,0.44138582263599,0.259884486195972,0.688130191255533 33 | ppp,m,2027,0.387561757199391,0.247719351110333,0.65544050161377 34 | ppp,m,2028,0.348543321439797,0.241337637151892,0.635854996676599 35 | ppp,m,2029,0.291848522826749,0.269025805872835,0.705927274958473 36 | ppp,m,2030,0.268919910246,0.258646617375023,0.686797157327441 37 | ppp,m,2031,0.250641131223844,0.252709155563897,0.666075513038602 38 | ppp,m,2032,0.235617235692693,0.247280334629383,0.653025928018798 39 | ppp,m,2033,0.222960528154995,0.242752142242392,0.6420618796478 40 | ppp,m,2034,0.186153161195007,0.26021861910284,0.689358292361689 41 | ppp,m,2035,0.178172095101815,0.257651587814224,0.675171714286035 42 | ppp,m,2036,0.171110522012626,0.250494118585006,0.664281420430122 43 | ppp,m,2037,0.164771407712308,0.250676426138864,0.659308336317986 44 | ppp,m,2038,0.159010260443084,0.249641390878626,0.653517705880693 45 | ppp,m,2039,0.131483694921187,0.260198196231541,0.688938510138538 46 | ppp,m,2040,0.127787283611896,0.257713947685093,0.677476263584794 47 | ppp,m,2041,0.12428993697815,0.257004994535618,0.674165387253765 48 | ppp,m,2042,0.120958477574032,0.255553469371667,0.664477823737272 49 | ppp,m,2043,0.117766712679071,0.250685526734408,0.662934913295114 50 | lle,f,2020,0.0480432374110331,0.234661348987938,0.0896777862662031 51 | lle,f,2021,0.00896721789974483,0.941815993291145,0.360824837951047 52 | lle,f,2022,0.00746028038796708,0.0535911827234823,0.141783851073904 53 | lle,f,2023,-0.0305165356416488,0.0720675873623765,0.190113355060559 54 | lle,f,2024,-0.226919187351552,0.905667212989931,2.36710037606041 55 | lle,f,2025,-0.608461965219194,1.08021621490582,2.85538264000775 56 | lle,f,2026,-0.990004743086818,1.25806514715961,3.32568856696691 57 | lle,f,2027,-0.459428522153902,0.726610618922386,1.91981850105593 58 | lle,f,2028,-0.651087219873458,0.82239998950192,2.14966376178759 59 | lle,f,2029,-0.412116864560758,0.614241436660381,1.59505292282129 60 | lle,f,2030,-0.540480868897651,0.668716601786072,1.7557864208922 61 | lle,f,2031,-0.668844873234513,0.729199196586484,1.92483430405873 62 | lle,f,2032,-0.486952310981194,0.595835609935167,1.56447647321396 63 | lle,f,2033,-0.583668968626731,0.643206426270219,1.68852158660256 64 | lle,f,2034,-0.680385626272249,0.690158495256528,1.80705619241684 65 | lle,f,2035,-0.777102283917764,0.731046769279183,1.93179702057134 66 | lle,f,2036,-0.8738189415633,0.780539434156057,2.04203622755017 67 | lle,f,2037,-0.970535599208814,0.821164963931148,2.16561236593075 68 | lle,f,2038,-0.767167868811377,0.696486293386151,1.83260087050192 69 | lle,f,2039,-0.844896118442104,0.735079010169354,1.94638991183251 70 | lle,f,2040,-0.92262436807281,0.775218820751697,2.03184276209203 71 | lle,f,2041,-1.00035261770354,0.820447215152552,2.11445345034923 72 | lle,f,2042,-1.07808086733425,0.845122639099083,2.2239889219515 73 | lle,f,2043,-1.15580911696495,0.88836946496922,2.32688717608471 74 | lle,m,2020,-1.41209734726175,0.475273925902037,0.180858247598276 75 | lle,m,2021,0.155459111608373,0.0365606275410303,0.0954628766460847 76 | lle,m,2022,0.117175805676228,0.0542507287267984,0.142572263602899 77 | lle,m,2023,1.30823874088294,0.730900285544872,1.91715849032506 78 | lle,m,2024,0.72035535093461,0.460237134562326,1.20915227497898 79 | lle,m,2025,0.526934459706753,0.549150388951955,1.44828744759318 80 | lle,m,2026,0.39344259856617,0.426217392467631,1.13067326659635 81 | lle,m,2027,0.263827217225195,0.492770319397573,1.29073587651717 82 | lle,m,2028,0.227981860814804,0.419089880580834,1.09385319034518 83 | lle,m,2029,0.130269234417311,0.468930036576562,1.22494799064566 84 | lle,m,2030,0.127101928910282,0.408852100919786,1.07503516692737 85 | lle,m,2031,0.0485309554788635,0.447870340303633,1.17082099551401 86 | lle,m,2032,-0.0300400179525582,0.481869531186059,1.27103127642089 87 | lle,m,2033,-0.00729747152485714,0.435219055480196,1.14833067995909 88 | lle,m,2034,-0.0731073429788956,0.46742874428569,1.22736881539028 89 | lle,m,2035,-0.138917214432931,0.49450455138525,1.31836834165726 90 | lle,m,2036,-0.204727085886984,0.528971997801088,1.39783691936413 91 | lle,m,2037,-0.161709866077686,0.483060734452584,1.27495206979726 92 | lle,m,2038,-0.218404664690736,0.5103744125288,1.33903944712956 93 | lle,m,2039,-0.275099463303799,0.533499730419066,1.41477100857002 94 | lle,m,2040,-0.331794261916849,0.564666881174168,1.48878913003762 95 | lle,m,2041,-0.388489060529912,0.592961887926123,1.55848832457676 96 | lle,m,2042,-0.445183859142959,0.619598496330786,1.62227113759142 97 | lle,m,2043,-0.501878657756009,0.646728234703606,1.69256573019323 98 | hle,f,2020,-1.29925192411912,0.476444847342563,0.179473515280399 99 | hle,f,2021,0.138104732832701,0.0366228762459261,0.0954556644444989 100 | hle,f,2022,1.45929349739205,0.547770245635813,1.43881348484704 101 | hle,f,2023,0.766995215657034,0.365267814635669,0.961763260063316 102 | hle,f,2024,0.57356190036596,0.458927225230286,1.2033408794936 103 | hle,f,2025,0.405498960646502,0.371620502878449,0.973332238273481 104 | hle,f,2026,0.320580182001043,0.323057426479148,0.854610217793067 105 | hle,f,2027,0.268919067785159,0.295828599484129,0.777142382432349 106 | hle,f,2028,0.23388678578409,0.27758962356472,0.73528486940539 107 | hle,f,2029,0.168225935639427,0.313988462121628,0.820183990102987 108 | hle,f,2030,0.151822448128685,0.297101943701489,0.77319148247016 109 | hle,f,2031,0.139076178102769,0.279530739632907,0.745410140607296 110 | hle,f,2032,0.128768053066729,0.27602639633731,0.722219885136169 111 | hle,f,2033,0.120166629523592,0.267481843453053,0.701163076592194 112 | hle,f,2034,0.112806443429856,0.257982155097726,0.68613724696742 113 | hle,f,2035,0.106377185423165,0.255608664909114,0.673813558834866 114 | hle,f,2036,0.124667946841992,0.233890310382506,0.619155756149472 115 | hle,f,2037,0.118142234352003,0.23152111132887,0.612056392136136 116 | hle,f,2038,0.112210408726827,0.231076327977737,0.6086621788749 117 | hle,f,2039,0.106767666402082,0.231708009886788,0.605497030559557 118 | hle,f,2040,0.101732493494371,0.229998093179998,0.602265828463221 119 | hle,f,2041,0.113580801623248,0.218363524611616,0.574437903684957 120 | hle,f,2042,0.108517976127184,0.219428048782622,0.573501970492013 121 | hle,f,2043,0.103754078624265,0.218528344363034,0.574892506500152 122 | hle,m,2020,2.98807783169218,0.183231996816577,0.481714705960317 123 | hle,m,2021,1.56227925790639,0.182987553470709,0.485171020904193 124 | hle,m,2022,1.08567682559972,0.185806266919959,0.489160953022212 125 | hle,m,2023,0.846373428662823,0.188136052209445,0.489813056389876 126 | hle,m,2024,0.748159711873529,0.231114226052169,0.606843317130409 127 | hle,m,2025,0.623017800128974,0.223471672703284,0.592047800991743 128 | hle,m,2026,0.53892173844356,0.218905906624878,0.576041675562774 129 | hle,m,2027,0.478280448220502,0.217254294086241,0.566527433065901 130 | hle,m,2028,0.432298390161436,0.214054064826854,0.560848417761751 131 | hle,m,2029,0.396089153545024,0.212474989868474,0.560786813640649 132 | hle,m,2030,0.379317466532436,0.192012710160569,0.506812163411555 133 | hle,m,2031,0.355740465386239,0.194349244744074,0.507902010956174 134 | hle,m,2032,0.335482331867593,0.194920131698057,0.511884320855141 135 | hle,m,2033,0.317831880056301,0.19427906454031,0.516011010367043 136 | hle,m,2034,0.302267573610902,0.197051780976947,0.52005162235752 137 | hle,m,2035,0.288398260275286,0.197334952876849,0.520559162711348 138 | hle,m,2036,0.275924823618311,0.199759589258858,0.522613898252706 139 | hle,m,2037,0.276542936683482,0.190437638836115,0.49915716898656 140 | hle,m,2038,0.266032191312865,0.191878846272318,0.503835008753617 141 | hle,m,2039,0.256331577733053,0.191511106900772,0.504439140771584 142 | hle,m,2040,0.257674491834682,0.184747366680036,0.490777191243843 143 | hle,m,2041,0.24911711710412,0.186866290071902,0.49425518055093 144 | hle,m,2042,0.241083983426635,0.190117492097557,0.501631742179263 145 | hle,m,2043,0.242615042585117,0.183994654945258,0.485809556480228 146 | -------------------------------------------------------------------------------- /src/nhp/docker/run.py: -------------------------------------------------------------------------------- 1 | """Run the model inside of the docker container.""" 2 | 3 | import gzip 4 | import json 5 | import logging 6 | import os 7 | import re 8 | from pathlib import Path 9 | from typing import Any, Callable 10 | 11 | from azure.identity import DefaultAzureCredential 12 | from azure.storage.blob import BlobServiceClient 13 | from azure.storage.filedatalake import DataLakeServiceClient 14 | 15 | from nhp.docker.config import Config 16 | from nhp.model.params import load_params 17 | from nhp.model.run import noop_progress_callback 18 | 19 | 20 | class RunWithLocalStorage: 21 | """Methods for running with local storage.""" 22 | 23 | def __init__(self, filename: str): 24 | """Initialize the RunWithLocalStorage instance. 25 | 26 | Args: 27 | filename: Name of the parameter file to load. 28 | """ 29 | self.params = load_params(f"queue/{filename}") 30 | 31 | def finish( 32 | self, 33 | results_file: str, 34 | saved_files: list, 35 | save_full_model_results: bool, 36 | additional_metadata: dict, 37 | ) -> None: 38 | """Post model run steps. 39 | 40 | Args: 41 | results_file: The path to the results file. 42 | saved_files: Filepaths of results, saved in parquet format and params in json format. 43 | save_full_model_results: Whether to save the full model results or not. 44 | additional_metadata: Additional metadata to log. 45 | """ 46 | 47 | def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]: 48 | """Progress callback method. 49 | 50 | For local storage do nothing. 51 | 52 | Returns: 53 | A no-op progress callback function. 54 | """ 55 | return noop_progress_callback 56 | 57 | 58 | class RunWithAzureStorage: 59 | """Methods for running with azure storage.""" 60 | 61 | def __init__(self, filename: str, config: Config = Config()): 62 | """Initialise RunWithAzureStorage. 63 | 64 | Args: 65 | filename: Name of the parameter file to load. 66 | config: The configuration for the run. Defaults to Config(). 67 | """ 68 | logging.getLogger("azure.storage.common.storageclient").setLevel(logging.WARNING) 69 | logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel( 70 | logging.WARNING 71 | ) 72 | self._config = config 73 | 74 | self._app_version = re.sub("(\\d+\\.\\d+)\\..*", "\\1", config.APP_VERSION) 75 | 76 | self._blob_storage_account_url = ( 77 | f"https://{self._config.STORAGE_ACCOUNT}.blob.core.windows.net" 78 | ) 79 | self._adls_storage_account_url = ( 80 | f"https://{self._config.STORAGE_ACCOUNT}.dfs.core.windows.net" 81 | ) 82 | 83 | self.params = self._get_params(filename) 84 | self._get_data(self.params["start_year"], self.params["dataset"]) 85 | 86 | def _get_container(self, container_name: str): 87 | return BlobServiceClient( 88 | account_url=self._blob_storage_account_url, 89 | credential=DefaultAzureCredential(), 90 | ).get_container_client(container_name) 91 | 92 | def _get_params(self, filename: str) -> dict: 93 | """Get the parameters for the model. 94 | 95 | Args: 96 | filename: The name of the params file. 97 | 98 | Returns: 99 | The parameters for the model. 100 | """ 101 | logging.info("downloading params: %s", filename) 102 | 103 | self._queue_blob = self._get_container("queue").get_blob_client(filename) 104 | 105 | params_content = self._queue_blob.download_blob().readall() 106 | 107 | return json.loads(params_content) 108 | 109 | def _get_data(self, year: str, dataset: str) -> None: 110 | """Get data to run the model. 111 | 112 | Downloads data from Azure storage for the specified year and dataset. 113 | 114 | Args: 115 | year: The year of data to load. 116 | dataset: The dataset to load. 117 | """ 118 | logging.info("downloading data (%s / %s)", year, dataset) 119 | fs_client = DataLakeServiceClient( 120 | account_url=self._adls_storage_account_url, 121 | credential=DefaultAzureCredential(), 122 | ).get_file_system_client("data") 123 | 124 | version = self._config.DATA_VERSION 125 | 126 | paths = [p.name for p in fs_client.get_paths(version, recursive=False)] 127 | 128 | for p in paths: 129 | subpath = f"{p}/fyear={year}/dataset={dataset}" 130 | os.makedirs(f"data{subpath.removeprefix(version)}", exist_ok=True) 131 | 132 | for i in fs_client.get_paths(subpath): 133 | filename = i.name 134 | if not filename.endswith("parquet"): 135 | continue 136 | 137 | logging.info(" * %s", filename) 138 | local_name = "data" + filename.removeprefix(version) 139 | with open(local_name, "wb") as local_file: 140 | file_client = fs_client.get_file_client(filename) 141 | local_file.write(file_client.download_file().readall()) 142 | 143 | def _upload_results_json(self, results_file: str, metadata: dict) -> None: 144 | """Upload the results. 145 | 146 | Once the model has run, upload the results to blob storage. 147 | 148 | Args: 149 | results_file: The saved results file. 150 | metadata: The metadata to attach to the blob. 151 | """ 152 | container = self._get_container("results") 153 | 154 | with open(f"results/{results_file}.json", "rb") as file: 155 | container.upload_blob( 156 | f"prod/{self._app_version}/{results_file}.json.gz", 157 | gzip.compress(file.read()), 158 | metadata=metadata, 159 | overwrite=True, 160 | ) 161 | 162 | def _upload_results_files(self, files: list, metadata: dict) -> None: 163 | """Upload the results. 164 | 165 | Once the model has run, upload the files (parquet for model results and json for 166 | model params) to blob storage. 167 | 168 | Args: 169 | files: List of files to be uploaded. 170 | metadata: The metadata to attach to the blob. 171 | """ 172 | container = self._get_container("results") 173 | for file in files: 174 | filename = file[8:] 175 | if file.endswith(".json"): 176 | metadata_to_use = metadata 177 | else: 178 | metadata_to_use = None 179 | with open(file, "rb") as f: 180 | container.upload_blob( 181 | f"aggregated-model-results/{self._app_version}/{filename}", 182 | f.read(), 183 | overwrite=True, 184 | metadata=metadata_to_use, 185 | ) 186 | 187 | def _upload_full_model_results(self) -> None: 188 | container = self._get_container("results") 189 | 190 | dataset = self.params["dataset"] 191 | scenario = self.params["scenario"] 192 | create_datetime = self.params["create_datetime"] 193 | 194 | path = Path(f"results/{dataset}/{scenario}/{create_datetime}") 195 | 196 | for file in path.glob("**/*.parquet"): 197 | filename = file.as_posix()[8:] 198 | with open(file, "rb") as f: 199 | container.upload_blob( 200 | f"full-model-results/{self._app_version}/{filename}", 201 | f.read(), 202 | overwrite=True, 203 | ) 204 | 205 | def _cleanup(self) -> None: 206 | """Cleanup. 207 | 208 | Once the model has run, remove the file from the queue. 209 | """ 210 | logging.info("cleaning up queue") 211 | 212 | self._queue_blob.delete_blob() 213 | 214 | def finish( 215 | self, 216 | results_file: str, 217 | saved_files: list, 218 | save_full_model_results: bool, 219 | additional_metadata: dict, 220 | ) -> None: 221 | """Post model run steps. 222 | 223 | Args: 224 | results_file: The path to the results file. 225 | saved_files: Filepaths of results, saved in parquet format and params in json format. 226 | save_full_model_results: Whether to save the full model results or not. 227 | additional_metadata: Additional metadata to log. 228 | """ 229 | metadata = { 230 | k: str(v) 231 | for k, v in self.params.items() 232 | if not isinstance(v, dict) and not isinstance(v, list) 233 | } 234 | metadata.update({k: str(v) for k, v in additional_metadata.items()}) 235 | 236 | self._upload_results_json(results_file, metadata) 237 | self._upload_results_files(saved_files, metadata) 238 | if save_full_model_results: 239 | self._upload_full_model_results() 240 | self._cleanup() 241 | 242 | def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]: 243 | """Progress callback method. 244 | 245 | Updates the metadata for the blob in the queue to give progress. 246 | 247 | Returns: 248 | A callback function that updates progress for each model type. 249 | """ 250 | blob = self._queue_blob 251 | 252 | current_progress = { 253 | **blob.get_blob_properties()["metadata"], 254 | "Inpatients": 0, 255 | "Outpatients": 0, 256 | "AaE": 0, 257 | } 258 | 259 | blob.set_blob_metadata({k: str(v) for k, v in current_progress.items()}) 260 | 261 | def callback(model_type: Any) -> Callable[[Any], None]: 262 | def update(n_completed: Any) -> None: 263 | current_progress[model_type] = n_completed 264 | blob.set_blob_metadata({k: str(v) for k, v in current_progress.items()}) 265 | 266 | return update 267 | 268 | return callback 269 | -------------------------------------------------------------------------------- /src/nhp/model/outpatients.py: -------------------------------------------------------------------------------- 1 | """Outpatients Module. 2 | 3 | Implements the Outpatients model. 4 | """ 5 | 6 | from typing import Any, Callable, Tuple 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from nhp.model.data import Data 12 | from nhp.model.model import Model 13 | from nhp.model.model_iteration import ModelIteration 14 | 15 | 16 | class OutpatientsModel(Model): 17 | """Outpatients Model. 18 | 19 | Implementation of the Model for Outpatient attendances. 20 | 21 | Args: 22 | params: The parameters to run the model with, or the path to a params file to load. 23 | data: A callable that creates a Data instance. 24 | hsa: An instance of the HealthStatusAdjustment class. If left as None an instance is 25 | created. Defaults to None. 26 | run_params: The parameters to use for each model run. Generated automatically if left as 27 | None. Defaults to None. 28 | save_full_model_results: Whether to save the full model results or not. Defaults to False. 29 | """ 30 | 31 | def __init__( 32 | self, 33 | params: dict | str, 34 | data: Callable[[int, str], Data], 35 | hsa: Any = None, 36 | run_params: dict | None = None, 37 | save_full_model_results: bool = False, 38 | ) -> None: 39 | """Initialise the Outpatients Model. 40 | 41 | Args: 42 | params: The parameters to use. 43 | data: A method to create a Data instance. 44 | hsa: Health Status Adjustment object. Defaults to None. 45 | run_params: The run parameters to use. Defaults to None. 46 | save_full_model_results: Whether to save full model results. Defaults to False. 47 | """ 48 | # call the parent init function 49 | super().__init__( 50 | "op", 51 | ["attendances", "tele_attendances"], 52 | params, 53 | data, 54 | hsa, 55 | run_params, 56 | save_full_model_results, 57 | ) 58 | 59 | def _get_data(self, data_loader: Data) -> pd.DataFrame: 60 | return data_loader.get_op() 61 | 62 | def get_data_counts(self, data: pd.DataFrame) -> np.ndarray: 63 | """Get row counts of data. 64 | 65 | Args: 66 | data: The data to get the counts of. 67 | 68 | Returns: 69 | The counts of the data, required for activity avoidance steps. 70 | """ 71 | return data[["attendances", "tele_attendances"]].to_numpy().astype(float).transpose() 72 | 73 | def _load_strategies(self, data_loader: Data) -> None: 74 | data = self.data.set_index("rn") 75 | 76 | activity_avoidance = pd.concat( 77 | [ 78 | "followup_reduction_" + data[~data["is_first"] & ~data["has_procedures"]]["type"], 79 | "consultant_to_consultant_reduction_" + data[data["is_cons_cons_ref"]]["type"], 80 | "gp_referred_first_attendance_reduction_" 81 | + data[data["is_gp_ref"] & data["is_first"]]["type"], 82 | ] 83 | ) 84 | efficiencies: pd.Series = pd.concat( # type: ignore 85 | ["convert_to_tele_" + data[~data["has_procedures"]]["type"]] 86 | ) 87 | 88 | self.strategies: dict[str, pd.DataFrame] = { 89 | k: v.rename("strategy").to_frame().assign(sample_rate=1) 90 | for k, v in { 91 | "activity_avoidance": activity_avoidance, 92 | "efficiencies": efficiencies, 93 | }.items() 94 | } 95 | 96 | @staticmethod 97 | def _convert_to_tele( 98 | data: pd.DataFrame, 99 | model_iteration: ModelIteration, 100 | ) -> tuple[pd.DataFrame, pd.DataFrame]: 101 | """Convert attendances to tele-attendances. 102 | 103 | Args: 104 | data: The DataFrame that we are updating. 105 | model_iteration: The model iteration containing the RNG and run parameters. 106 | 107 | Returns: 108 | A tuple containing the updated data and the updated step counts. 109 | """ 110 | # TODO: we need to make sure efficiences contains convert to tele keys 111 | rng = model_iteration.rng 112 | params = model_iteration.run_params["efficiencies"]["op"] 113 | strategies = model_iteration.model.strategies["efficiencies"] 114 | # make sure to take the complement of the parameter 115 | factor = 1 - data["rn"].map(strategies["strategy"].map(params)).fillna(1) 116 | # create a value for converting attendances into tele attendances for each row 117 | # the value will be a random binomial value, i.e. we will convert between 0 and attendances 118 | # into tele attendances 119 | tele_conversion = rng.binomial(data["attendances"].to_list(), factor.to_list()) 120 | # update the columns, subtracting tc from one, adding tc to the other (we maintain the 121 | # number of overall attendances) 122 | data["attendances"] -= tele_conversion 123 | data["tele_attendances"] += tele_conversion 124 | 125 | step_counts = ( 126 | pd.DataFrame( 127 | { 128 | "pod": data["pod"], 129 | "sitetret": data["sitetret"], 130 | "change_factor": "efficiencies", 131 | "strategy": "convert_to_tele", 132 | "attendances": tele_conversion * -1, 133 | "tele_attendances": tele_conversion, 134 | } 135 | ) 136 | .groupby(["pod", "sitetret", "change_factor", "strategy"], as_index=False) # ty: ignore[no-matching-overload] 137 | .sum() 138 | .query("attendances<0") 139 | ) 140 | return data, step_counts 141 | 142 | def apply_resampling(self, row_samples: np.ndarray, data: pd.DataFrame) -> pd.DataFrame: 143 | """Apply row resampling. 144 | 145 | Called from within `model.activity_resampling.ActivityResampling.apply_resampling`. 146 | 147 | Args: 148 | row_samples: [2xn] array, where n is the number of rows in `data`, containing the new 149 | values for `data["attendances"]` and `data["tele_attendances"]`. 150 | data: The data that we want to update. 151 | 152 | Returns: 153 | The updated data. 154 | """ 155 | data["attendances"] = row_samples[0] 156 | data["tele_attendances"] = row_samples[1] 157 | # return the altered data 158 | return data 159 | 160 | def efficiencies( 161 | self, data: pd.DataFrame, model_iteration: ModelIteration 162 | ) -> tuple[pd.DataFrame, pd.DataFrame | None]: 163 | """Run the efficiencies steps of the model. 164 | 165 | Args: 166 | data: The data to apply efficiencies to. 167 | model_iteration: An instance of the ModelIteration class. 168 | 169 | Returns: 170 | Tuple containing the updated data and step counts. 171 | """ 172 | data, step_counts = self._convert_to_tele(data, model_iteration) 173 | return data, step_counts 174 | 175 | def calculate_avoided_activity( 176 | self, data: pd.DataFrame, data_resampled: pd.DataFrame 177 | ) -> pd.DataFrame: 178 | """Calculate the rows that have been avoided. 179 | 180 | Args: 181 | data: The data before the binomial thinning step. 182 | data_resampled: The data after the binomial thinning step. 183 | 184 | Returns: 185 | The data that was avoided in the binomial thinning step. 186 | """ 187 | avoided = ( 188 | data[["attendances", "tele_attendances"]] 189 | - data_resampled[["attendances", "tele_attendances"]] 190 | ) 191 | data[["attendances", "tele_attendances"]] = avoided 192 | return data 193 | 194 | @staticmethod 195 | def process_results(data: pd.DataFrame) -> pd.DataFrame: 196 | """Process the data into a format suitable for aggregation in results files. 197 | 198 | Args: 199 | data: Data to be processed. Format should be similar to Model.data. 200 | 201 | Returns: 202 | Processed results. 203 | """ 204 | measures = data.melt(["rn"], ["attendances", "tele_attendances"], "measure") 205 | 206 | # note: any columns used in the calls to _create_agg, including pod and measure 207 | # must be included below 208 | agg_cols = [ 209 | "pod", 210 | "sitetret", 211 | "measure", 212 | "sex", 213 | "age_group", 214 | "age", 215 | "tretspef", 216 | "tretspef_grouped", 217 | ] 218 | data = ( 219 | data.drop(["attendances", "tele_attendances"], axis="columns") 220 | .merge(measures, on="rn") 221 | # summarise the results to make the create_agg steps quicker 222 | .groupby( 223 | agg_cols, 224 | dropna=False, 225 | as_index=False, 226 | ) # ty: ignore[no-matching-overload] 227 | .agg({"value": "sum"}) 228 | .fillna("unknown") 229 | ) 230 | return data 231 | 232 | def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]: 233 | """Create other aggregations specific to the model type. 234 | 235 | Args: 236 | model_results: The results of a model run. 237 | 238 | Returns: 239 | Dictionary containing the specific aggregations. 240 | """ 241 | return { 242 | "sex+tretspef_grouped": self.get_agg(model_results, "sex", "tretspef_grouped"), 243 | "tretspef": self.get_agg(model_results, "tretspef"), 244 | } 245 | 246 | def save_results(self, model_iteration: ModelIteration, path_fn: Callable[[str], str]) -> None: 247 | """Save the results of running the model. 248 | 249 | This method is used for saving the results of the model run to disk as a parquet file. 250 | It saves just the `rn` (row number) column and the `attendances` and `tele_attendances` 251 | columns, with the intention that you rejoin to the original data. 252 | 253 | Args: 254 | model_iteration: An instance of the ModelIteration class. 255 | path_fn: A function which takes the activity type and returns a path. 256 | """ 257 | model_iteration.get_model_results().set_index(["rn"])[ 258 | ["attendances", "tele_attendances"] 259 | ].to_parquet(f"{path_fn('op')}/0.parquet") 260 | 261 | model_iteration.avoided_activity.set_index(["rn"])[ 262 | ["attendances", "tele_attendances"] 263 | ].to_parquet(f"{path_fn('op_avoided')}/0.parquet") 264 | -------------------------------------------------------------------------------- /src/nhp/model/model_iteration.py: -------------------------------------------------------------------------------- 1 | """Model Iteration. 2 | 3 | Provides a simple class which holds all of the data required for a model iteration 4 | """ 5 | 6 | from typing import TYPE_CHECKING 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from nhp.model.activity_resampling import ActivityResampling 12 | 13 | ModelRunResult = tuple[dict[str, pd.Series], pd.Series | None] 14 | 15 | if TYPE_CHECKING: 16 | from nhp.model.model import Model 17 | 18 | 19 | class ModelIteration: 20 | """Model Iteration. 21 | 22 | Holds all of the information for a model iteration. 23 | """ 24 | 25 | def __init__(self, model: "Model", model_run: int): 26 | """Perform an iteration of the model. 27 | 28 | Args: 29 | model: An instance of a Model object. 30 | model_run: Which model iteration to run. 31 | """ 32 | self.model: Model = model 33 | 34 | self.model_run = model_run 35 | # if model_run == -1, then use model_run = 0 for run params 36 | self.run_params = model._get_run_params(max(0, model_run)) 37 | self.rng = np.random.default_rng(self.run_params["seed"]) 38 | 39 | self._patch_run_params() 40 | 41 | # data is mutated, so is not a property 42 | self.data = model.data.copy() 43 | self.step_counts = None 44 | self.avoided_activity = pd.DataFrame() 45 | 46 | # run the model 47 | self._run() 48 | 49 | @property 50 | def params(self): 51 | """Get the models parameters.""" 52 | return self.model.params 53 | 54 | def _patch_run_params(self): 55 | """Patch Run Parameters. 56 | 57 | The run parameters for some items need to be 'patched' so that they include all of the 58 | fields that are used in that step of the model 59 | """ 60 | run_params = self.run_params 61 | for i in ["expat", "repat_local", "repat_nonlocal"]: 62 | run_params[i]["op"] = { 63 | g: run_params[i]["op"] for g in ["first", "followup", "procedure"] 64 | } 65 | run_params[i]["aae"] = {k: {"Other": v} for k, v in run_params[i]["aae"].items()} 66 | 67 | run_params["baseline_adjustment"]["aae"] = { 68 | k: {"Other": v} for k, v in run_params["baseline_adjustment"]["aae"].items() 69 | } 70 | 71 | def _run(self): 72 | if self.model_run == 0: 73 | return 74 | 75 | data_ar, step_counts_ar = ( 76 | ActivityResampling(self) 77 | .demographic_adjustment() 78 | .birth_adjustment() 79 | .health_status_adjustment() 80 | .expat_adjustment() 81 | .repat_adjustment() 82 | .waiting_list_adjustment() 83 | .baseline_adjustment() 84 | .non_demographic_adjustment() 85 | .inequalities_adjustment() 86 | # call apply_resampling last, as this is what actually alters the data 87 | .apply_resampling() 88 | ) 89 | 90 | data_aa, step_counts_aa = self.model.activity_avoidance(data_ar.copy(), self) 91 | data_ef, step_counts_ef = self.model.efficiencies(data_aa.copy(), self) 92 | 93 | self.avoided_activity = self.model.calculate_avoided_activity(data_ar, data_aa) 94 | 95 | self.data = data_ef 96 | 97 | step_counts_dfs_to_concat: list[pd.DataFrame] = [ 98 | self.model.baseline_step_counts, 99 | step_counts_ar, 100 | step_counts_aa if step_counts_aa is not None else pd.DataFrame(), 101 | step_counts_ef if step_counts_ef is not None else pd.DataFrame(), 102 | ] 103 | 104 | self.step_counts = pd.concat(step_counts_dfs_to_concat) 105 | 106 | def fix_step_counts( 107 | self, 108 | data: pd.DataFrame, 109 | future: np.ndarray, 110 | factors: pd.DataFrame, 111 | term_name: str, 112 | ) -> pd.DataFrame: 113 | """Calculate the step counts. 114 | 115 | Calculates the step counts for the current model run, saving back to 116 | self._model_run.step_counts. 117 | 118 | Args: 119 | data: The data for the current model run. 120 | future: The future row counts after running the poisson resampling. 121 | factors: The factors for this current model run. 122 | term_name: The name of the interaction term for this step. 123 | 124 | Returns: 125 | The step counts for this step. 126 | """ 127 | before = self.model.get_data_counts(data) 128 | # convert the paramater values from a dict of 2d numpy arrays to a 3d numpy array 129 | param_values = np.array(list(factors.to_numpy().transpose())) 130 | # later on we want to be able to multiply by baseline, we need to have compatible 131 | # numpy shapes 132 | # our param values has shape of (x, y). baseline has shape of (z, y). 133 | # (x, 1, y) will allow (x, y) * (z, y) 134 | shape = (param_values.shape[0], 1, param_values.shape[1]) 135 | # calculate the simple effect of each parameter, if it was performed in isolation to all 136 | # other parameters 137 | param_simple_effects = (param_values - 1).reshape(shape) * before 138 | # what is the difference left over from the expected changes (model interaction term) 139 | diff = future - (before + param_simple_effects.sum(axis=0)) 140 | # convert the 3d numpy array back to a pandas dataframe aggregated by the columns we are 141 | # interested in 142 | idx = pd.MultiIndex.from_frame(data[["pod", "sitetret"]]) # ty: ignore[invalid-argument-type] 143 | return pd.concat( 144 | [ 145 | pd.DataFrame(v.transpose(), columns=self.model.measures, index=idx) 146 | .groupby(level=idx.names) 147 | .sum() 148 | .assign(change_factor=k) 149 | .reset_index() 150 | for k, v in { 151 | **dict(zip(factors.columns, param_simple_effects)), 152 | term_name: diff, 153 | }.items() 154 | ] 155 | ) 156 | 157 | def get_aggregate_results(self) -> ModelRunResult: 158 | """Aggregate the model results. 159 | 160 | Can also be used to aggregate the baseline data by passing in the raw data. 161 | 162 | Returns: 163 | A tuple containing a dictionary of results, and the step counts. 164 | """ 165 | aggregations = self.model.aggregate(self) 166 | 167 | if not self.avoided_activity.empty: 168 | avoided_activity_agg = self.model.process_results(self.avoided_activity) 169 | aggregations["avoided_activity"] = self.model.get_agg( 170 | avoided_activity_agg, "sex", "age_group" 171 | ) 172 | 173 | return aggregations, self.get_step_counts() 174 | 175 | def get_step_counts(self) -> pd.Series | None: 176 | """Get the step counts of a model run.""" 177 | if self.step_counts is None: 178 | return None 179 | 180 | step_counts = ( 181 | self.step_counts.melt( 182 | [i for i in self.step_counts.columns if i not in self.model.measures], 183 | var_name="measure", 184 | ) 185 | .assign(activity_type=self.model.model_type) 186 | .set_index( 187 | [ 188 | "activity_type", 189 | "sitetret", 190 | "pod", 191 | "change_factor", 192 | "strategy", 193 | "measure", 194 | ] 195 | ) 196 | .sort_index()["value"] 197 | ) 198 | 199 | step_counts = self._step_counts_get_type_changes(step_counts) 200 | 201 | return step_counts 202 | 203 | def _step_counts_get_type_changes(self, step_counts) -> pd.Series: 204 | return pd.concat( 205 | [ 206 | step_counts, 207 | self._step_counts_get_type_change_daycase(step_counts), 208 | self._step_counts_get_type_change_outpatients(step_counts), 209 | self._step_counts_get_type_change_sdec(step_counts), 210 | ] 211 | ) # type: ignore 212 | 213 | def _step_counts_get_type_change_daycase(self, step_counts): 214 | # get the daycase conversion values 215 | sc_tc_df = ( 216 | step_counts[ 217 | step_counts.index.isin( 218 | ["day_procedures_usually_dc", "day_procedures_occasionally_dc"], 219 | level="strategy", 220 | ) 221 | ] 222 | .to_frame() 223 | .reset_index() 224 | ) 225 | sc_tc_df["pod"] = "ip_elective_daycase" 226 | sc_tc_df.loc[sc_tc_df["measure"] == "beddays", "value"] = sc_tc_df.loc[ 227 | sc_tc_df["measure"] == "admissions", "value" 228 | ].tolist() 229 | return sc_tc_df.groupby(step_counts.index.names)["value"].sum() * -1 230 | 231 | def _step_counts_get_type_change_outpatients(self, step_counts): 232 | # get the outpatient conversion values 233 | sc_tc_df = ( 234 | step_counts[ 235 | step_counts.index.isin( 236 | ["day_procedures_usually_op", "day_procedures_occasionally_op"], 237 | level="strategy", 238 | ) 239 | & (step_counts.index.get_level_values("measure") == "admissions") 240 | ] 241 | .to_frame() 242 | .reset_index() 243 | ) 244 | 245 | sc_tc_df["activity_type"] = "op" 246 | sc_tc_df["pod"] = "op_procedure" 247 | sc_tc_df["measure"] = "attendances" 248 | 249 | return sc_tc_df.groupby(step_counts.index.names)["value"].sum() * -1 250 | 251 | def _step_counts_get_type_change_sdec(self, step_counts): 252 | # get the sdec conversion values 253 | sc_tc_df = ( 254 | step_counts[ 255 | step_counts.index.isin( 256 | [ 257 | f"same_day_emergency_care_{i}" 258 | for i in ["very_high", "high", "moderate", "low"] 259 | ], 260 | level="strategy", 261 | ) 262 | & (step_counts.index.get_level_values("measure") == "admissions") 263 | ] 264 | .to_frame() 265 | .reset_index() 266 | ) 267 | 268 | sc_tc_df["activity_type"] = "aae" 269 | sc_tc_df["pod"] = "aae_type-05" 270 | sc_tc_df["measure"] = "arrivals" 271 | 272 | return sc_tc_df.groupby(step_counts.index.names)["value"].sum() * -1 273 | 274 | def get_model_results(self): 275 | """Get the model results of a model run.""" 276 | return self.data.reset_index(drop=True).drop(columns=["hsagrp"]) 277 | -------------------------------------------------------------------------------- /src/nhp/model/activity_resampling.py: -------------------------------------------------------------------------------- 1 | """Inpatient Row Resampling. 2 | 3 | Methods for handling row resampling 4 | """ 5 | 6 | from typing import TYPE_CHECKING 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | if TYPE_CHECKING: 12 | from nhp.model.model_iteration import ModelIteration 13 | 14 | 15 | class ActivityResampling: 16 | """Activity Resampling. 17 | 18 | Class for handling the activity resampling methods in the model. The class keeps track 19 | of the current row counts, which represent the value for the lambda parameter to a 20 | random poisson when we come to resample the rows, and the step counts (the estimated 21 | effect of each step on the total number of rows). 22 | 23 | The public methods of this class are intended to each be called either once, or not 24 | at all. 25 | These methods update the row counts by multiplying the current value by the factor 26 | generated from that method. 27 | 28 | Once all of the methods have been run, finally we need to call the `apply_resampling` 29 | method. 30 | This updates the `model_iteration` which is passed in at initialisation. 31 | 32 | Args: 33 | model_iteration: The model iteration object, which contains all of the required 34 | values to run the model. 35 | """ 36 | 37 | def __init__(self, model_iteration: "ModelIteration") -> None: 38 | """Initialise ActivityResampling. 39 | 40 | Args: 41 | model_iteration: The current model iteration we are performing. 42 | """ 43 | self._model_iteration = model_iteration 44 | 45 | # initialise step counts 46 | self.factors = [] 47 | 48 | @property 49 | def _baseline_counts(self): 50 | return self._model_iteration.model.baseline_counts 51 | 52 | @property 53 | def _activity_type(self): 54 | return self._model_iteration.model.model_type 55 | 56 | @property 57 | def params(self): 58 | """Get the models params.""" 59 | return self._model_iteration.params 60 | 61 | @property 62 | def run_params(self): 63 | """Get the current params for the model run.""" 64 | return self._model_iteration.run_params 65 | 66 | @property 67 | def demog_factors(self): 68 | """Get the demographic factors for the model.""" 69 | return self._model_iteration.model.demog_factors 70 | 71 | @property 72 | def birth_factors(self): 73 | """Get the birth factors for the model.""" 74 | return self._model_iteration.model.birth_factors 75 | 76 | @property 77 | def hsa(self): 78 | """Get the health status adjustment GAMs for the model.""" 79 | return self._model_iteration.model.hsa 80 | 81 | @property 82 | def inequalities_factors(self): 83 | """Get the inequalities factors for the model.""" 84 | return self._model_iteration.model.inequalities_factors 85 | 86 | @property 87 | def data(self): 88 | """Get the current model runs data.""" 89 | return self._model_iteration.data 90 | 91 | def _update(self, factor: pd.Series) -> "ActivityResampling": 92 | step = factor.name 93 | 94 | factor = ( 95 | self.data.merge(factor, how="left", left_on=factor.index.names, right_index=True)[step] 96 | .astype(float) 97 | .fillna(1.0) 98 | ) 99 | 100 | self.factors.append(factor) 101 | 102 | return self 103 | 104 | def demographic_adjustment(self) -> "ActivityResampling": 105 | """Perform the demograhic adjustment.""" 106 | year = str(self.run_params["year"]) 107 | variant = self.run_params["variant"] 108 | 109 | factor = self.demog_factors.loc[(variant, slice(None), slice(None))][year].rename( 110 | "demographic_adjustment" 111 | ) 112 | 113 | groups = set(self.data["group"]) - {"maternity"} 114 | factor: pd.Series = pd.concat({i: factor for i in groups}) # type: ignore 115 | factor.index.names = ["group", *factor.index.names[1:]] 116 | 117 | return self._update(factor) 118 | 119 | def birth_adjustment(self) -> "ActivityResampling": 120 | """Perform the birth adjustment.""" 121 | year = str(self.run_params["year"]) 122 | variant = self.run_params["variant"] 123 | 124 | factor = self.birth_factors.loc[([variant], slice(None), slice(None))][year] 125 | 126 | factor = pd.Series( 127 | factor.values, 128 | name="birth_adjustment", 129 | index=pd.MultiIndex.from_tuples( 130 | [("maternity", a, s) for _, a, s in factor.index.to_numpy()], 131 | names=["group", "age", "sex"], 132 | ), 133 | ) 134 | 135 | return self._update(factor) 136 | 137 | def health_status_adjustment(self) -> "ActivityResampling": 138 | """Perform the health status adjustment.""" 139 | if not self.params["health_status_adjustment"]: 140 | return self 141 | 142 | return self._update(self.hsa.run(self.run_params)) 143 | 144 | def inequalities_adjustment(self) -> "ActivityResampling": 145 | """Perform the inequalities adjustment.""" 146 | activity_type = self._activity_type 147 | 148 | match activity_type: 149 | case "op": 150 | factor_key = "procedure" 151 | case "ip": 152 | factor_key = "elective" 153 | case _: 154 | return self 155 | 156 | if not self.params["inequalities"]: 157 | return self 158 | 159 | # TODO: currently only works for provider level model (we overwrite provider in PBM) 160 | # We need to match on ICB *and* provider for PBM 161 | 162 | factor = self.inequalities_factors.set_index(["icb", "sushrg_trimmed", "imd_quintile"])[ 163 | "factor" 164 | ] 165 | 166 | factor: pd.Series = pd.concat({factor_key: factor}, names=["group"]) # type: ignore 167 | factor.name = "inequalities" 168 | return self._update(factor) 169 | 170 | def expat_adjustment(self) -> "ActivityResampling": 171 | """Perform the expatriation adjustment.""" 172 | params = { 173 | k: v 174 | for k, v in self.run_params["expat"][self._activity_type].items() 175 | if v # remove empty values from the dictionary 176 | } 177 | if not params: 178 | return self 179 | 180 | factor: pd.Series = pd.concat( # type: ignore 181 | {k: pd.Series(v, name="expat") for k, v in params.items()} 182 | ) 183 | factor.index.names = ["group", "tretspef_grouped"] 184 | return self._update(factor) 185 | 186 | def repat_adjustment(self) -> "ActivityResampling": 187 | """Perform the repatriation adjustment.""" 188 | params = { 189 | (is_main_icb, k): pd.Series(v, name="repat") 190 | for (is_main_icb, repat_type) in [ 191 | (1, "repat_local"), 192 | (0, "repat_nonlocal"), 193 | ] 194 | for k, v in self.run_params[repat_type][self._activity_type].items() 195 | if v # remove empty values from the dictionary 196 | } 197 | if not params: 198 | return self 199 | 200 | factor: pd.Series = pd.concat(params) # type: ignore 201 | factor.index.names = ["is_main_icb", "group", "tretspef_grouped"] 202 | return self._update(factor) 203 | 204 | def baseline_adjustment(self) -> "ActivityResampling": 205 | """Perform the baseline adjustment. 206 | 207 | A value of 1 will indicate that we want to sample this row at the baseline rate. A value 208 | less that 1 will indicate we want to sample that row less often that in the baseline, and 209 | a value greater than 1 will indicate that we want to sample that row more often than in the 210 | baseline. 211 | """ 212 | if not (params := self.run_params["baseline_adjustment"][self._activity_type]): 213 | return self 214 | 215 | factor: pd.Series = pd.concat( # type: ignore 216 | { 217 | k: pd.Series(v, name="baseline_adjustment", dtype="float64") 218 | for k, v in params.items() 219 | } 220 | ) 221 | factor.index.names = ["group", "tretspef_grouped"] 222 | return self._update(factor) 223 | 224 | def waiting_list_adjustment(self) -> "ActivityResampling": 225 | """Perform the waiting list adjustment. 226 | 227 | A value of 1 will indicate that we want to sample this row at the baseline rate. A value 228 | less that 1 will indicate we want to sample that row less often that in the baseline, and 229 | a value greater than 1 will indicate that we want to sample that row more often than in the 230 | baseline. 231 | """ 232 | activity_type = self._activity_type 233 | if activity_type == "aae": 234 | return self 235 | 236 | if not (params := self.run_params["waiting_list_adjustment"][activity_type]): 237 | return self 238 | 239 | factor = pd.Series(params) 240 | 241 | # update the index to include "True" for the is_wla field 242 | factor.index = pd.MultiIndex.from_tuples( 243 | [(True, i) for i in factor.index], names=["is_wla", "tretspef_grouped"] 244 | ) 245 | factor.name = "waiting_list_adjustment" 246 | 247 | return self._update(factor) 248 | 249 | def non_demographic_adjustment(self) -> "ActivityResampling": 250 | """Perform the non-demographic adjustment.""" 251 | if not (params := self.run_params["non-demographic_adjustment"][self._activity_type]): 252 | return self 253 | 254 | match self.params["non-demographic_adjustment"]["value-type"]: 255 | case "year-on-year-growth": 256 | year_exponent = self.run_params["year"] - self.params["start_year"] 257 | case x: 258 | raise ValueError(f"invalid value-type: {x}") 259 | 260 | factor = pd.Series(params).rename("non-demographic_adjustment") ** year_exponent 261 | factor.index.names = ["ndggrp"] 262 | return self._update(factor) 263 | 264 | def apply_resampling(self) -> tuple[pd.DataFrame, pd.DataFrame]: 265 | """Apply the row resampling to the data.""" 266 | # get the random sampling for each row 267 | rng = self._model_iteration.rng 268 | factors = pd.concat(self.factors, axis=1) 269 | 270 | # reshape this to be the same as baseline counts 271 | overall_factor = ( 272 | self._model_iteration.model.baseline_counts * factors.prod(axis=1).to_numpy() 273 | ) 274 | 275 | row_samples: np.ndarray = rng.poisson(overall_factor) # ty: ignore[invalid-assignment] 276 | 277 | step_counts = self._model_iteration.fix_step_counts( 278 | self.data, row_samples, factors, "model_interaction_term" 279 | ).assign(strategy="-") 280 | 281 | # apply the random sampling, update the data and get the counts 282 | data = self._model_iteration.model.apply_resampling(row_samples, self.data) 283 | 284 | return data, step_counts 285 | -------------------------------------------------------------------------------- /tests/unit/nhp/model/test_outpatients.py: -------------------------------------------------------------------------------- 1 | """Test outpatients model.""" 2 | 3 | from unittest.mock import Mock, call, patch 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import pytest 8 | 9 | from nhp.model.outpatients import OutpatientsModel 10 | 11 | 12 | # fixtures 13 | @pytest.fixture 14 | def mock_model(): 15 | """Create a mock Model instance.""" 16 | with patch.object(OutpatientsModel, "__init__", lambda s, p, d, h, r: None): 17 | mdl = OutpatientsModel(None, None, None, None) # type: ignore 18 | mdl.model_type = "op" 19 | mdl.params = { 20 | "dataset": "synthetic", 21 | "model_runs": 3, 22 | "seed": 1, 23 | "demographic_factors": { 24 | "file": "demographics_file.csv", 25 | "variant_probabilities": {"a": 0.6, "b": 0.4}, 26 | }, 27 | "start_year": 2018, 28 | "end_year": 2020, 29 | "health_status_adjustment": [0.8, 1.0], 30 | "waiting_list_adjustment": "waiting_list_adjustment", 31 | "expat": {"op": {"Other": [0.7, 0.9]}}, 32 | "repat_local": {"op": {"Other": [1.0, 1.2]}}, 33 | "repat_nonlocal": {"op": {"Other": [1.3, 1.5]}}, 34 | "non-demographic_adjustment": { 35 | "a": {"a_a": [1, 1.2], "a_b": [1, 1.2]}, 36 | "b": {"b_a": [1, 1.2], "b_b": [1, 1.2]}, 37 | }, 38 | "inpatient_factors": { 39 | "admission_avoidance": { 40 | "a_a": {"interval": [0.4, 0.6]}, 41 | "a_b": {"interval": [0.4, 0.6]}, 42 | }, 43 | "los_reduction": { 44 | "b_a": {"interval": [0.4, 0.6]}, 45 | "b_b": {"interval": [0.4, 0.6]}, 46 | }, 47 | }, 48 | "outpatient_factors": { 49 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}}, 50 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}}, 51 | }, 52 | "op_factors": { 53 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}}, 54 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}}, 55 | }, 56 | } 57 | # create a minimal data object for testing 58 | mdl.data = pd.DataFrame( 59 | { 60 | "rn": list(range(1, 21)), 61 | "age": list(range(1, 6)) * 4, 62 | "sex": ([1] * 5 + [2] * 5) * 2, 63 | "hsagrp": [x for _ in range(1, 11) for x in ["op_a_a", "op_b_b"]], 64 | } 65 | ) 66 | return mdl 67 | 68 | 69 | # methods 70 | 71 | 72 | def test_init_calls_super_init(mocker): 73 | """Test that the model calls the super method.""" 74 | # arrange 75 | super_mock = mocker.patch("nhp.model.outpatients.super") 76 | # act 77 | OutpatientsModel("params", "data_path", "hsa", "run_params") # type: ignore 78 | # assert 79 | super_mock.assert_called_once() 80 | 81 | 82 | def test_get_data(mock_model): 83 | # arrange 84 | mdl = mock_model 85 | data_loader = Mock() 86 | data_loader.get_op.return_value = "op data" 87 | 88 | # act 89 | actual = mdl._get_data(data_loader) 90 | 91 | # assert 92 | assert actual == "op data" 93 | data_loader.get_op.assert_called_once_with() 94 | 95 | 96 | def test_get_data_counts(mock_model): 97 | # arrange 98 | mdl = mock_model 99 | data = mdl.data 100 | data["attendances"] = list(range(1, 21)) 101 | data["tele_attendances"] = list(range(21, 41)) 102 | # act 103 | actual = mdl.get_data_counts(data) 104 | # assert 105 | assert actual.tolist() == [ 106 | [float(i) for i in range(1, 21)], 107 | [float(i) for i in range(21, 41)], 108 | ] 109 | 110 | 111 | def test_load_strategies(mock_model): 112 | # arrange 113 | mdl = mock_model 114 | mdl.data["has_procedures"] = [True] * 10 + [False] * 10 115 | mdl.data["is_first"] = ([True] * 5 + [False] * 5) * 2 116 | mdl.data["is_cons_cons_ref"] = [True] * 10 + [False] * 10 117 | mdl.data["type"] = ["a", "b", "c", "d", "e"] * 4 118 | mdl.data["is_gp_ref"] = [False] * 10 + [True] * 10 119 | # act 120 | mdl._load_strategies(None) 121 | # assert 122 | assert mdl.strategies["activity_avoidance"]["strategy"].to_list() == [ 123 | f"{i}_{j}" 124 | for i in ["followup_reduction"] 125 | + ["consultant_to_consultant_reduction"] * 2 126 | + ["gp_referred_first_attendance_reduction"] 127 | for j in ["a", "b", "c", "d", "e"] 128 | ] 129 | assert mdl.strategies["activity_avoidance"]["sample_rate"].to_list() == [1] * 20 130 | 131 | 132 | def test_convert_to_tele(mock_model): 133 | """Test that it mutates the data.""" 134 | # arrange 135 | mdl = mock_model 136 | 137 | mr_mock = Mock() 138 | mr_mock.rng.binomial.return_value = np.array([10, 15, 0, 20, 25, 0, 30, 35, 0, 40, 45, 0]) 139 | data = pd.DataFrame( 140 | { 141 | "rn": range(12), 142 | "pod": (["a"] * 3 + ["b"] * 3) * 2, 143 | "sitetret": ["c"] * 6 + ["d"] * 6, 144 | "has_procedures": [False, False, True] * 4, 145 | "type": ["a", "b", "a"] * 4, 146 | "attendances": [20, 25, 30] * 4, 147 | "tele_attendances": [5, 10, 0] * 4, 148 | } 149 | ) 150 | mr_mock.run_params = { 151 | "efficiencies": {"op": {"convert_to_tele_a": 0.25, "convert_to_tele_b": 0.5}} 152 | } 153 | mr_mock.model.strategies = { 154 | "efficiencies": pd.DataFrame( 155 | [ 156 | {"rn": k, "strategy": "convert_to_tele_a" if k % 2 else "convert_to_tele_b"} 157 | for k in data["rn"] 158 | ] 159 | ).set_index("rn") 160 | } 161 | 162 | # act 163 | actual_data, actual_step_counts = mdl._convert_to_tele(data.copy(), mr_mock) 164 | 165 | # assert 166 | assert actual_data["attendances"].to_list() == [ 167 | 10, 168 | 10, 169 | 30, 170 | 0, 171 | 0, 172 | 30, 173 | -10, 174 | -10, 175 | 30, 176 | -20, 177 | -20, 178 | 30, 179 | ] 180 | assert actual_data["tele_attendances"].to_list() == [ 181 | 15, 182 | 25, 183 | 0, 184 | 25, 185 | 35, 186 | 0, 187 | 35, 188 | 45, 189 | 0, 190 | 45, 191 | 55, 192 | 0, 193 | ] 194 | 195 | assert mr_mock.rng.binomial.call_args == call( 196 | data["attendances"].to_list(), [i for _ in range(6) for i in [0.5, 0.75]] 197 | ) 198 | 199 | assert actual_step_counts.to_dict("list") == { 200 | "pod": ["a", "a", "b", "b"], 201 | "sitetret": ["c", "d", "c", "d"], 202 | "change_factor": [ 203 | "efficiencies", 204 | "efficiencies", 205 | "efficiencies", 206 | "efficiencies", 207 | ], 208 | "strategy": [ 209 | "convert_to_tele", 210 | "convert_to_tele", 211 | "convert_to_tele", 212 | "convert_to_tele", 213 | ], 214 | "attendances": [-25, -65, -45, -85], 215 | "tele_attendances": [25, 65, 45, 85], 216 | } 217 | 218 | 219 | def test_apply_resampling(mocker, mock_model): 220 | # arrange 221 | row_samples = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) 222 | # act 223 | data = mock_model.apply_resampling(row_samples, pd.DataFrame()) 224 | # assert 225 | assert data["attendances"].to_list() == [1, 2, 3, 4] 226 | assert data["tele_attendances"].to_list() == [5, 6, 7, 8] 227 | 228 | 229 | def test_efficiencies(mock_model): 230 | """Test that it runs the model steps.""" 231 | # arrange 232 | mdl = mock_model 233 | data = pd.DataFrame({"x": [1]}) 234 | 235 | mdl._convert_to_tele = Mock(return_value=("data", "step_counts")) 236 | 237 | # act 238 | actual = mdl.efficiencies(data, "model_run") 239 | 240 | # assert 241 | assert actual == ("data", "step_counts") 242 | 243 | mdl._convert_to_tele.assert_called_once() 244 | assert mdl._convert_to_tele.call_args[0][0].to_dict("list") == {"x": [1]} 245 | assert mdl._convert_to_tele.call_args[0][1] == "model_run" 246 | 247 | 248 | def test_process_results(mock_model): 249 | # arrange 250 | df = pd.DataFrame( 251 | { 252 | "sitetret": ["trust"] * 4, 253 | "is_first": [True, True, False, False], 254 | "has_procedures": [False, True, False, True], 255 | "tretspef": [1, 1, 1, 1], 256 | "tretspef_grouped": [1, 1, 1, 1], 257 | "rn": [1, 2, 3, 4], 258 | "attendances": [5, 6, 7, 8], 259 | "tele_attendances": [9, 10, 11, 12], 260 | "age": [1, 1, 1, 1], 261 | "age_group": [1, 1, 1, 1], 262 | "sex": [1, 1, 1, 1], 263 | "pod": ["op_first", "op_procedure", "op_follow-up", "op_procedure"], 264 | } 265 | ) 266 | expected = { 267 | "pod": [k for k in ["op_first", "op_follow-up", "op_procedure"] for _ in [0, 1]], 268 | "sitetret": ["trust"] * 6, 269 | "measure": ["attendances", "tele_attendances"] * 3, 270 | "sex": [1] * 6, 271 | "age": [1] * 6, 272 | "age_group": [1] * 6, 273 | "tretspef": [1] * 6, 274 | "tretspef_grouped": [1] * 6, 275 | "value": [5, 9, 7, 11, 14, 22], 276 | } 277 | # act 278 | actual = mock_model.process_results(df) 279 | # assert 280 | assert actual.to_dict("list") == expected 281 | 282 | 283 | def test_specific_aggregations(mocker, mock_model): 284 | """Test that it aggregates the results correctly.""" 285 | # arrange 286 | m = mocker.patch("nhp.model.OutpatientsModel.get_agg", return_value="agg_data") 287 | 288 | mdl = mock_model 289 | 290 | # act 291 | actual = mdl.specific_aggregations("results") # type: ignore 292 | 293 | # assert 294 | assert actual == { 295 | "sex+tretspef_grouped": "agg_data", 296 | "tretspef": "agg_data", 297 | } 298 | 299 | assert m.call_args_list == [ 300 | call("results", "sex", "tretspef_grouped"), 301 | call("results", "tretspef"), 302 | ] 303 | 304 | 305 | def test_save_results(mocker, mock_model): 306 | """Test that it correctly saves the results.""" 307 | 308 | def path_fn(x): 309 | return x 310 | 311 | mr_mock = Mock() 312 | mr_mock.get_model_results.return_value = pd.DataFrame( 313 | {"rn": [0], "attendances": [1], "tele_attendances": [2]} 314 | ) 315 | mr_mock.avoided_activity = pd.DataFrame( 316 | {"rn": [0], "attendances": [1], "tele_attendances": [0]} 317 | ) 318 | 319 | to_parquet_mock = mocker.patch("pandas.DataFrame.to_parquet") 320 | mock_model.save_results(mr_mock, path_fn) 321 | assert to_parquet_mock.call_args_list == [ 322 | call("op/0.parquet"), 323 | call("op_avoided/0.parquet"), 324 | ] 325 | 326 | 327 | def test_calculate_avoided_activity(mock_model): 328 | # arrange 329 | data = pd.DataFrame({"rn": [0], "attendances": [4], "tele_attendances": [3]}) 330 | data_resampled = pd.DataFrame({"rn": [0], "attendances": [2], "tele_attendances": [1]}) 331 | # act 332 | actual = mock_model.calculate_avoided_activity(data, data_resampled) 333 | # assert 334 | assert actual.to_dict(orient="list") == { 335 | "rn": [0], 336 | "attendances": [2], 337 | "tele_attendances": [2], 338 | } 339 | -------------------------------------------------------------------------------- /src/nhp/model/results.py: -------------------------------------------------------------------------------- 1 | """Methods to work with results of the model. 2 | 3 | This module allows you to work with the results of the model. Namely, combining the monte-carlo runs 4 | into a single panda's dataframe, and helping with saving the results files. 5 | """ 6 | 7 | import json 8 | import logging 9 | import os 10 | from typing import Dict, List 11 | 12 | import janitor 13 | import pandas as pd 14 | 15 | from nhp.model.model_iteration import ModelRunResult 16 | 17 | 18 | def _complete_model_runs( 19 | res: List[pd.DataFrame], model_runs: int, include_baseline: bool = True 20 | ) -> pd.DataFrame: 21 | """Complete the data frame for all model runs. 22 | 23 | If any aggregation returns rows for only some of the model runs, we need to add a "0" row for 24 | that run. 25 | 26 | Args: 27 | res: List of model results. 28 | model_runs: The number of model runs. 29 | include_baseline: Whether to include model run 0 (the baseline) or not. Defaults to True. 30 | 31 | Returns: 32 | Combined and completed data frame. 33 | """ 34 | results = pd.concat(res) 35 | results: pd.DataFrame = results.groupby( # type: ignore 36 | [i for i in results.columns if i != "value"], as_index=False 37 | )["value"].sum() 38 | 39 | return janitor.complete( 40 | results, 41 | [i for i in results.columns if i != "model_run" if i != "value"], 42 | {"model_run": range(0 if include_baseline else 1, model_runs + 1)}, 43 | fill_value={"value": 0}, 44 | ) 45 | 46 | 47 | def _combine_model_results( 48 | results: list[list[ModelRunResult]], 49 | ) -> dict[str, pd.DataFrame]: 50 | """Combine the results of the monte carlo runs. 51 | 52 | Takes as input a list of lists, where the outer list contains an item for inpatients, 53 | outpatients and a&e runs, and the inner list contains the results of the monte carlo runs. 54 | 55 | Args: 56 | results: A list containing the model results. 57 | 58 | Returns: 59 | Dictionary containing the combined model results. 60 | """ 61 | aggregations = sorted(list({k for r in results for v, _ in r for k in v.keys()})) 62 | 63 | model_runs = len(results[0]) - 1 64 | 65 | return { 66 | k: _complete_model_runs( 67 | [ 68 | v[k].reset_index().assign(model_run=i) 69 | for r in results 70 | for (i, (v, _)) in enumerate(r) 71 | if k in v 72 | ], 73 | model_runs, 74 | ) 75 | for k in aggregations 76 | } 77 | 78 | 79 | def _combine_step_counts(results: list) -> pd.DataFrame: 80 | """Combine the step counts of the monte carlo runs. 81 | 82 | Takes as input a list of lists, where the outer list contains an item for inpatients, 83 | outpatients and a&e runs, and the inner list contains the results of the monte carlo runs. 84 | 85 | Args: 86 | results: A list containing the model results. 87 | 88 | Returns: 89 | DataFrame containing the model step counts. 90 | """ 91 | model_runs = len(results[0]) - 1 92 | return _complete_model_runs( 93 | [ 94 | v 95 | # TODO: handle the case of daycase conversion, it's duplicating values 96 | # need to figure out exactly why, but this masks the issue for now 97 | .groupby(v.index.names) 98 | .sum() 99 | .reset_index() 100 | .assign(model_run=i) 101 | for r in results 102 | for i, (_, v) in enumerate(r) 103 | if i > 0 104 | ], 105 | model_runs, 106 | include_baseline=False, 107 | ) 108 | 109 | 110 | def generate_results_json( 111 | combined_results: dict[str, pd.DataFrame], 112 | combined_step_counts: pd.DataFrame, 113 | params: dict, 114 | run_params: dict, 115 | ) -> str: 116 | """Generate the results in the json format and save.""" 117 | 118 | def agg_to_dict(res): 119 | results_df = res.set_index("model_run") 120 | return ( 121 | pd.concat( 122 | [ 123 | results_df.loc[0] 124 | .set_index([i for i in results_df.columns if i != "value"]) 125 | .rename(columns={"value": "baseline"}), 126 | results_df.loc[results_df.index != 0] 127 | .groupby([i for i in results_df.columns if i != "value"]) 128 | .agg(list) 129 | .rename(columns={"value": "model_runs"}), 130 | ], 131 | axis=1, 132 | ) 133 | .reset_index() 134 | .to_dict(orient="records") 135 | ) 136 | 137 | dict_results = {k: agg_to_dict(v) for k, v in combined_results.items()} 138 | 139 | dict_results["step_counts"] = ( 140 | combined_step_counts.groupby( # ty: ignore[no-matching-overload] 141 | [ 142 | "pod", 143 | "change_factor", 144 | "strategy", 145 | "sitetret", 146 | "activity_type", 147 | "measure", 148 | ] 149 | )[["value"]] 150 | .agg(list) 151 | .reset_index() 152 | .to_dict("records") 153 | ) 154 | 155 | for i in dict_results["step_counts"]: 156 | i["model_runs"] = i.pop("value") 157 | if i["change_factor"] == "baseline": 158 | i["model_runs"] = i["model_runs"][0:1] 159 | if i["strategy"] == "-": 160 | i.pop("strategy") 161 | 162 | filename = f"{params['dataset']}/{params['scenario']}-{params['create_datetime']}" 163 | os.makedirs(f"results/{params['dataset']}", exist_ok=True) 164 | with open(f"results/{filename}.json", "w", encoding="utf-8") as file: 165 | json.dump( 166 | { 167 | "params": params, 168 | "population_variants": run_params["variant"], 169 | "results": dict_results, 170 | }, 171 | file, 172 | ) 173 | return filename 174 | 175 | 176 | def save_results_files(results: dict, params: dict) -> list: 177 | """Save aggregated and combined results as parquet, and params as JSON. 178 | 179 | Args: 180 | results: The results of running the models, processed into one dictionary. 181 | params: The parameters used for the model run. 182 | 183 | Returns: 184 | Filepaths to saved files. 185 | """ 186 | path = f"results/{params['dataset']}/{params['scenario']}/{params['create_datetime']}" 187 | os.makedirs(path, exist_ok=True) 188 | 189 | return [ 190 | *[_save_parquet_file(path, k, v, params) for k, v in results.items()], 191 | _save_params_file(path, params), 192 | ] 193 | 194 | 195 | def _add_metadata_to_dataframe(df: pd.DataFrame, params: dict) -> pd.DataFrame: 196 | """Add metadata as columns to the dataframe. 197 | 198 | Add metadata as columns to the dataframe, so that the saved parquet files have useful 199 | information regarding their provenance. 200 | 201 | Args: 202 | df: The dataframe that we want to add the metadata to. 203 | params: The parameters for the model run, which include metadata. 204 | 205 | Returns: 206 | The dataframe, with additional columns "dataset", "scenario" and "create_datetime". 207 | """ 208 | metadata_to_save = ["dataset", "scenario", "app_version", "create_datetime"] 209 | for m in metadata_to_save: 210 | df[m] = params[m] 211 | return df 212 | 213 | 214 | def _save_parquet_file(path: str, results_name: str, results_df: pd.DataFrame, params: dict) -> str: 215 | """Save a results dataframe as parquet. 216 | 217 | Args: 218 | path: The folder where we want to save the results to. 219 | results_name: The name of this aggregation. 220 | results_df: The results dataframe. 221 | params: The parameters for the model run. 222 | 223 | Returns: 224 | The filename of the saved file. 225 | """ 226 | results_df = _add_metadata_to_dataframe(results_df, params) 227 | results_df.to_parquet(filename := f"{path}/{results_name}.parquet") 228 | return filename 229 | 230 | 231 | def _save_params_file(path: str, params: dict) -> str: 232 | """Save the model runs parameters as json. 233 | 234 | Args: 235 | path: The folder where we want to save the results to. 236 | params: The parameters the model was run with. 237 | 238 | Returns: 239 | The filename of the saved file. 240 | """ 241 | with open(filename := f"{path}/params.json", "w", encoding="utf-8") as file: 242 | json.dump(params, file) 243 | return filename 244 | 245 | 246 | def _patch_converted_sdec_activity( 247 | results: Dict[str, pd.DataFrame], column: str, col_value: str 248 | ) -> None: 249 | """Patch the converted SDEC activity in the dataframe.""" 250 | results_df = results[column] 251 | agg_cols = ["pod", "sitetret", "measure", "model_run"] 252 | 253 | default_sdec = ( 254 | results["default"].query("pod == 'aae_type-05'").set_index(agg_cols)["value"].rename("b") 255 | ) 256 | 257 | missing_sdec_activity = ( 258 | pd.concat( 259 | [ 260 | default_sdec, 261 | ( 262 | results_df.query("pod == 'aae_type-05'") 263 | .groupby(agg_cols)["value"] # ty: ignore[no-matching-overload] 264 | .sum() 265 | .rename("a") 266 | ), 267 | ], 268 | axis=1, 269 | ) 270 | .fillna(0) 271 | .reset_index() 272 | .assign(value=lambda x: x["b"] - x["a"]) 273 | .drop(columns=["b", "a"]) 274 | ) 275 | missing_sdec_activity[column] = col_value 276 | 277 | df_fixed = ( 278 | pd.concat([results_df, missing_sdec_activity], axis=0) 279 | .groupby( 280 | ["pod", "sitetret", "measure", column, "model_run"], 281 | as_index=False, 282 | ) # ty: ignore[no-matching-overload] 283 | .sum() 284 | ) 285 | 286 | df_fixed["value"] = df_fixed["value"].astype("int64") 287 | 288 | results[column] = df_fixed 289 | 290 | 291 | def combine_results( 292 | results: list[list[ModelRunResult]], 293 | ) -> tuple[dict[str, pd.DataFrame], pd.DataFrame]: 294 | """Combine the results into a single dictionary. 295 | 296 | When we run the models we have an array containing 3 items [inpatients, outpatient, a&e]. 297 | Each of which contains one item for each model run, which is a dictionary. 298 | 299 | Args: 300 | results: The results of running the models. 301 | 302 | Returns: 303 | Tuple containing combined model results dictionary and combined step counts DataFrame. 304 | """ 305 | logging.info(" * starting to combine results") 306 | 307 | combined_results = _combine_model_results(results) 308 | combined_step_counts = _combine_step_counts(results) 309 | 310 | # TODO: this is a bit of a hack, but we need to patch the converted SDEC activity 311 | # because inpatients activity is aggregated differently to a&e, the a&e aggregations will be 312 | # missing the converted SDEC activity, so we need to add it back in 313 | _patch_converted_sdec_activity(combined_results, "acuity", "standard") 314 | _patch_converted_sdec_activity(combined_results, "attendance_category", "1") 315 | 316 | logging.info(" * finished combining results") 317 | return combined_results, combined_step_counts 318 | -------------------------------------------------------------------------------- /.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | # NHP Model - Copilot Coding Agent Instructions 2 | 3 | ## Repository Overview 4 | 5 | This is the **New Hospital Programme (NHP) Demand Model**, a Python package for healthcare activity prediction. The model provides modeling capabilities for inpatients, outpatients, and A&E (Accident & Emergency) services. It is built as a Python library using modern packaging tools and is deployed as both a Python package and a Docker container to Azure. 6 | 7 | **Key Facts:** 8 | - **Project Type:** Python package/library with Docker containerization 9 | - **Python Version:** Requires Python 3.11 or higher (specified in pyproject.toml) 10 | - **Package Manager:** `uv` (modern Python package manager from Astral) 11 | - **Build System:** setuptools with setuptools-scm for versioning 12 | - **Primary Language:** Python 13 | - **Project Size:** Medium-sized Python project 14 | - **Main Modules:** nhp.model (core model code), nhp.docker (Docker runtime) 15 | 16 | ## Environment Setup and Build Instructions 17 | 18 | ### Initial Setup 19 | 20 | **ALWAYS start by installing uv and project dependencies:** 21 | 22 | ```bash 23 | # Install uv using the recommended approach from Astral 24 | curl -LsSf https://astral.sh/uv/install.sh | sh 25 | 26 | # Install project dependencies (production only) 27 | uv sync 28 | 29 | # Install with dev dependencies for development/testing (RECOMMENDED for development) 30 | uv sync --extra dev 31 | 32 | # Install with docs dependencies for documentation 33 | uv sync --extra docs 34 | 35 | # Install multiple extras at once 36 | uv sync --extra dev --extra docs 37 | ``` 38 | 39 | **Important:** The `uv sync` command only installs production dependencies. For development work (linting, testing), use `uv sync --extra dev` to install the dev dependencies. 40 | 41 | **Python Version:** The project requires Python 3.11+. The CI uses Python 3.11 specifically via `uv python install` in workflows. 42 | 43 | ### Build Commands 44 | 45 | **To build the package:** 46 | 47 | ```bash 48 | # Standard build - creates wheel and source distribution 49 | uv build 50 | 51 | # Build for development (sets version to 0.dev0) 52 | SETUPTOOLS_SCM_PRETEND_VERSION=0.dev0 uv build 53 | ``` 54 | 55 | The build creates: 56 | - `dist/nhp_model--py3-none-any.whl` 57 | - `dist/nhp_model-.tar.gz` 58 | 59 | **Note:** The Dockerfile includes a TODO comment about forcing version numbers during Docker builds. Currently it uses `ENV SETUPTOOLS_SCM_PRETEND_VERSION=v0.0.0` as a workaround. 60 | 61 | ### Testing 62 | 63 | **Unit Tests (ALWAYS run these before committing):** 64 | 65 | ```bash 66 | # Run all unit tests 67 | uv run pytest tests/unit --verbose 68 | 69 | # Run unit tests with coverage report 70 | uv run pytest --cov=. tests/unit --ignore=tests --cov-branch --cov-report xml:coverage.xml 71 | ``` 72 | 73 | **Integration Tests:** 74 | 75 | ```bash 76 | # Integration tests require test data in a specific format 77 | # These are located in tests/integration/ but may require data setup 78 | uv run pytest tests/integration --verbose 79 | ``` 80 | 81 | **All unit tests must pass. Test failures are NOT acceptable.** 82 | 83 | ### Linting and Formatting 84 | 85 | **ALWAYS run linting before committing. All linting checks MUST pass:** 86 | 87 | ```bash 88 | # Run ruff linting check 89 | uvx ruff check . 90 | 91 | # Run ruff format check (no auto-formatting) 92 | uvx ruff format --check . 93 | 94 | # Auto-format code (if needed) 95 | uvx ruff format . 96 | 97 | # Run type checking with ty 98 | uvx ty check . 99 | ``` 100 | 101 | **Linting Configuration:** 102 | - Ruff config is in `pyproject.toml` under `[tool.ruff]` 103 | - Line length: 100 characters 104 | - Target Python version: 3.11 105 | - Excludes: `notebooks/` directory 106 | - Key rules: pydocstyle (D), pycodestyle (E/W), isort (I), pylint (PL), pandas-vet (PD), numpy (NPY), ruff-specific (RUF) 107 | - Docstring convention: Google style 108 | 109 | **The notebooks directory is excluded from linting and should not be linted.** 110 | 111 | ### Documentation 112 | 113 | ```bash 114 | # Build documentation (requires docs dependencies) 115 | uv run mkdocs build --clean 116 | 117 | # Serve documentation locally 118 | uv run mkdocs serve 119 | ``` 120 | 121 | Documentation is deployed automatically to Connect via CI on main branch pushes. 122 | 123 | ### Running the Model 124 | 125 | **Local execution:** 126 | 127 | ```bash 128 | # Run with sample parameters (requires data in specified path) 129 | uv run python -m nhp.model queue/params-sample.json -d data/synth --type all 130 | 131 | # Run single model type 132 | uv run python -m nhp.model queue/params-sample.json -d data --type ip # inpatients 133 | uv run python -m nhp.model queue/params-sample.json -d data --type op # outpatients 134 | uv run python -m nhp.model queue/params-sample.json -d data --type aae # A&E 135 | 136 | # Run specific model iteration for debugging 137 | uv run python -m nhp.model queue/params-sample.json -d data --model-run 1 --type ip 138 | ``` 139 | 140 | **Command-line arguments:** 141 | - `params_file`: Path to JSON parameters file (default: `queue/params-sample.json`) 142 | - `-d, --data-path`: Path to data directory (default: `data`) 143 | - `-r, --model-run`: Which model iteration to run (default: 1) 144 | - `-t, --type`: Model type - `all`, `ip`, `op`, or `aae` (default: `all`) 145 | - `--save-full-model-results`: Save complete model results 146 | 147 | **Data Requirements:** 148 | The model expects data in parquet format organized by fiscal year and dataset: 149 | - Format: `{data_path}/{file}/fyear={year}/dataset={dataset}/` 150 | - Required files: `ip`, `op`, `aae`, `demographic_factors`, `birth_factors`, `hsa_activity_tables`, `hsa_gams` (pickle) 151 | - Sample data location: `data/synth/` (synthetic dataset for testing - see GitHub issue #347) 152 | 153 | ## Project Structure 154 | 155 | ### Directory Layout 156 | 157 | **Core Directories:** 158 | - `.github/workflows/` - CI/CD pipelines (linting, codecov, build, deploy) 159 | - `src/nhp/model/` - Core model: `__main__.py`, `model.py`, `inpatients.py`, `outpatients.py`, `aae.py`, `run.py`, `results.py`, `data/` 160 | - `src/nhp/docker/` - Docker runtime with Azure Storage integration 161 | - `tests/unit/` - Unit tests 162 | - `tests/integration/` - Integration tests (require data) 163 | - `docs/` - MkDocs documentation 164 | - `notebooks/` - Databricks notebooks (excluded from linting) 165 | - `queue/` - Parameter files (params-sample.json) 166 | 167 | **Key Configuration Files:** 168 | - `pyproject.toml` - Project metadata, dependencies, ruff/pytest/setuptools config 169 | - `uv.lock` - Locked dependency versions (DO NOT modify manually) 170 | - `params-schema.json` - JSON schema for model parameters (deployed to GitHub Pages) 171 | 172 | ### Architecture Overview 173 | 174 | **Model Hierarchy:** 175 | - `Model` (base class in model.py) - Common model functionality 176 | - `InpatientsModel` - Inpatient demand modeling 177 | - `OutpatientsModel` - Outpatient demand modeling 178 | - `AaEModel` - A&E demand modeling 179 | 180 | **Execution Flow:** 181 | 1. `__main__.py` parses CLI arguments and loads parameters 182 | 2. `run.py` orchestrates model execution (single or parallel runs) 183 | 3. `ModelIteration` runs a single model iteration 184 | 4. Results are aggregated and saved by `results.py` 185 | 186 | **Data Loading:** 187 | - Abstract `Data` interface allows multiple data sources 188 | - `Local` loads from local parquet files 189 | - `DatabricksNational` loads from Databricks (used in notebooks) 190 | 191 | ## CI/CD Validation Pipeline 192 | 193 | ### Pull Request Checks 194 | 195 | **Every pull request triggers these workflows (ALL MUST PASS):** 196 | 197 | 1. **Linting** (`.github/workflows/linting.yaml`): 198 | - `ruff check` - Code quality checks 199 | - `ruff format --check` - Code formatting verification 200 | - `ty check .` - Type checking 201 | 202 | 2. **Code Coverage** (`.github/workflows/codecov.yaml`): 203 | - Runs unit tests with coverage 204 | - Uploads to Codecov 205 | - Requires passing tests 206 | 207 | **IMPORTANT:** All linting and test checks must pass before merge. DO NOT skip or disable these checks. 208 | 209 | ### Main Branch / Release Workflows 210 | 211 | On push to main or tags: 212 | 213 | 1. **build_app.yaml**: Builds Python wheel, uploads to Azure Storage and GitHub releases 214 | 2. **build_schema.yaml**: Deploys params-schema.json to GitHub Pages 215 | 3. **build_container.yaml**: Builds and pushes Docker image to GitHub Container Registry 216 | 4. **deploy_docs.yaml**: Builds and deploys MkDocs documentation to RStudio Connect 217 | 218 | ### Docker Deployment 219 | 220 | The model is containerized using: 221 | - Base image: `ghcr.io/astral-sh/uv:python3.11-alpine` 222 | - Build args: `app_version`, `data_version`, `storage_account` 223 | - Entry point: `python -m nhp.docker` 224 | - Tags: `dev` (PRs), `v*.*.*` (releases), `latest` (latest release) 225 | 226 | ## Common Issues and Workarounds 227 | 228 | **Known Issues:** 229 | 1. **Dockerfile Version**: Uses `ENV SETUPTOOLS_SCM_PRETEND_VERSION=v0.0.0` because setuptools-scm needs git metadata (TODO: build wheel and copy instead) 230 | 2. **Data Structure**: Model expects parquet files at `{data_path}/{file}/fyear={year}/dataset={dataset}/`. Missing files cause runtime errors. 231 | 3. **Notebooks**: `notebooks/` directory excluded from linting - don't lint these Databricks notebooks. 232 | 233 | **Environment Variables (Docker):** 234 | - `APP_VERSION`, `DATA_VERSION` (default: "dev") 235 | - `STORAGE_ACCOUNT` (required for Azure), `BATCH_SIZE` (default: 16) 236 | - `.env` file supported via python-dotenv for local development 237 | 238 | ## Testing Strategy 239 | 240 | - **Unit Tests**: `tests/unit/` - Mock-based, parameterized. **ALWAYS run before committing.** 241 | - **Integration Tests**: `tests/integration/` - Require properly formatted test data, test end-to-end runs 242 | - **Test Organization**: pytest-mock for mocking, fixtures in `tests/conftest.py` 243 | - **Coverage**: High coverage maintained via Codecov integration 244 | 245 | ## Best Practices for Coding Agents 246 | 247 | 1. **ALWAYS install dependencies first**: Run `uv sync --extra dev` before any development work. 248 | 249 | 2. **ALWAYS run linting before committing**: Run `uvx ruff check .` and `uvx ruff format --check .` - these MUST pass. 250 | 251 | 3. **ALWAYS run unit tests**: Run `uv run pytest tests/unit` before committing - all tests MUST pass. 252 | 253 | 4. **Follow Google docstring convention**: All public functions/classes must have Google-style docstrings (enforced by ruff). 254 | 255 | 5. **Respect line length**: Maximum 100 characters per line (ruff will enforce this). 256 | 257 | 6. **Don't modify notebooks**: The `notebooks/` directory is excluded from linting for a reason. These are Databricks notebooks with special formatting. 258 | 259 | 7. **Use uv for all Python commands**: Prefix commands with `uv run` to ensure correct virtual environment usage. 260 | 261 | 8. **Don't modify uv.lock manually**: Use `uv sync` to update dependencies. 262 | 263 | 9. **Test locally before pushing**: The CI checks are strict and will fail if linting/tests don't pass. 264 | 265 | 10. **Understand the data structure**: The model requires specific data formats. If testing model execution, ensure proper test data is available or use existing test fixtures. 266 | 267 | ## Quick Reference 268 | 269 | ```bash 270 | # Setup (production + dev dependencies) 271 | curl -LsSf https://astral.sh/uv/install.sh | sh 272 | uv sync --extra dev 273 | 274 | # Lint (MUST pass) 275 | uvx ruff check . 276 | uvx ruff format --check . 277 | 278 | # Test (MUST pass) 279 | uv run pytest tests/unit --verbose 280 | 281 | # Build 282 | uv build 283 | 284 | # Run model (requires data) 285 | uv run python -m nhp.model queue/params-sample.json -d data --type all 286 | 287 | # Build docs (requires docs extras) 288 | uv sync --extra docs 289 | uv run mkdocs build --clean 290 | ``` 291 | 292 | **When in doubt, check the CI workflows in `.github/workflows/` - they define the exact validation steps used in the pipeline.** 293 | --------------------------------------------------------------------------------