├── tests
├── unit
│ └── nhp
│ │ ├── model
│ │ ├── __init__.py
│ │ ├── test___init__.py
│ │ ├── test_helpers.py
│ │ ├── data
│ │ │ ├── test_data.py
│ │ │ ├── test_reference.py
│ │ │ └── test_local.py
│ │ ├── test_params.py
│ │ ├── test__main__.py
│ │ ├── test_run.py
│ │ ├── test_aae.py
│ │ ├── test_inpatient_efficiencies.py
│ │ └── test_outpatients.py
│ │ └── docker
│ │ ├── test_config.py
│ │ └── test___main__.py
├── conftest.py
└── integration
│ └── nhp
│ └── model
│ ├── test_params_validation.py
│ └── test_run_model.py
├── .coveragerc
├── src
└── nhp
│ ├── docker
│ ├── __init__.py
│ ├── config.py
│ ├── __main__.py
│ └── run.py
│ └── model
│ ├── data
│ ├── __init__.py
│ ├── reference
│ │ ├── variant_lookup.json
│ │ ├── __init__.py
│ │ └── hsa_split_normal_params.csv
│ ├── data.py
│ └── local.py
│ ├── __init__.py
│ ├── helpers.py
│ ├── params
│ ├── __main__.py
│ └── __init__.py
│ ├── __main__.py
│ ├── run.py
│ ├── aae.py
│ ├── health_status_adjustment.py
│ ├── outpatients.py
│ ├── model_iteration.py
│ ├── activity_resampling.py
│ └── results.py
├── .vscode
├── extensions.json
├── settings.json
├── tasks.json
└── launch.json
├── codecov.yml
├── CODEOWNERS
├── .github
├── workflows
│ ├── deploy_dev.yaml
│ ├── codecov.yaml
│ ├── linting.yaml
│ ├── deploy_release.yaml
│ ├── remove_untagged_container_images.yaml
│ ├── removed_closed_prs.yaml
│ ├── build_container.yaml
│ ├── deploy_docs.yaml
│ ├── deploy_pr.yaml
│ ├── build_schema.yaml
│ └── build_app.yaml
├── dependabot.yml
└── copilot-instructions.md
├── docs
├── gen_ref_pages.py
└── index.md
├── LICENSE
├── mkdocs.yml
├── Dockerfile
├── pyproject.toml
├── readme.md
└── .gitignore
/tests/unit/nhp/model/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | tests/*
--------------------------------------------------------------------------------
/src/nhp/docker/__init__.py:
--------------------------------------------------------------------------------
1 | """NHP Demand Model - Docker runtime."""
2 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test___init__.py:
--------------------------------------------------------------------------------
1 | """Test __init__.py."""
2 |
3 | import nhp.model as mdl
4 |
5 | # no tests other than ability to import
6 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "ryanluker.vscode-coverage-gutters",
4 | "ms-python.python",
5 | "ms-toolsai.jupyter",
6 | "ms-python.pylint"
7 | ]
8 | }
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | status:
3 | project:
4 | default:
5 | target: 100%
6 | threshold: 0%
7 | patch:
8 | default:
9 | target: 100%
10 | threshold: 0%
11 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | def pytest_addoption(parser):
5 | parser.addoption("--data-dir", help="Directory containing data", default="data/synth")
6 |
7 |
8 | @pytest.fixture
9 | def data_dir(request):
10 | return request.config.getoption("--data-dir")
11 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # These owners will be the default owners for everything in
2 | # the repo. Unless a later match takes precedence,
3 | # @primary-owner and @secondary-owner will be requested for
4 | # review when someone opens a pull request.
5 | * @tomjemmett @The-Strategy-Unit/nhp_model_devs
6 |
--------------------------------------------------------------------------------
/src/nhp/model/data/__init__.py:
--------------------------------------------------------------------------------
1 | """NHP Data Loaders.
2 |
3 | Classes for loading data for the NHP model. Each class supports loading data from different sources,
4 | such as from local storage or directly from DataBricks.
5 | """
6 |
7 | from nhp.model.data.data import Data
8 | from nhp.model.data.local import Local
9 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "pylint.args": [
3 | "--max-args",
4 | "6"
5 | ],
6 | "python.testing.pytestArgs": [
7 | "tests",
8 | "--data-dir=data/synth"
9 | ],
10 | "python.testing.unittestEnabled": false,
11 | "python.testing.pytestEnabled": true,
12 | "[python]": {
13 | "editor.defaultFormatter": "charliermarsh.ruff"
14 | },
15 | "azurite.location": "../.azurite",
16 | "nhp.data_path": "data/synth"
17 | }
--------------------------------------------------------------------------------
/.github/workflows/deploy_dev.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | workflow_dispatch:
6 |
7 | name: Deploy Dev
8 |
9 | jobs:
10 | deploy-ghcr-dev:
11 | uses: ./.github/workflows/build_container.yaml
12 | with:
13 | docker-tag: ghcr.io/the-strategy-unit/nhp_model:dev
14 | app-version: dev
15 | data-version: dev
16 | secrets: inherit
17 |
18 | deploy-dev-schema:
19 | uses: ./.github/workflows/build_schema.yaml
20 | with:
21 | schema-tag: dev
22 | secrets: inherit
--------------------------------------------------------------------------------
/tests/integration/nhp/model/test_params_validation.py:
--------------------------------------------------------------------------------
1 | """Test params-sample."""
2 |
3 | import pytest
4 |
5 | from nhp.model.params import load_sample_params
6 |
7 |
8 | def test_sample_params_are_valid():
9 | load_sample_params(dataset="dev", scenario="unit-test")
10 | # assert: no exception raised
11 |
12 |
13 | def test_load_sample_params_validation_fails():
14 | from jsonschema.exceptions import ValidationError
15 |
16 | with pytest.raises(ValidationError):
17 | load_sample_params(demographic_factors="invalid-factor")
18 |
--------------------------------------------------------------------------------
/src/nhp/model/__init__.py:
--------------------------------------------------------------------------------
1 | """New Hospitals Programme Model."""
2 |
3 | # re-export anything useful
4 | from nhp.model.aae import AaEModel
5 | from nhp.model.activity_resampling import ActivityResampling
6 | from nhp.model.health_status_adjustment import HealthStatusAdjustmentInterpolated
7 | from nhp.model.inpatients import InpatientEfficiencies, InpatientsModel
8 | from nhp.model.model import Model
9 | from nhp.model.model_iteration import ModelIteration
10 | from nhp.model.outpatients import OutpatientsModel
11 | from nhp.model.params import load_params, load_sample_params
12 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "uv" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "type": "shell",
6 | "label": "Run Code Coverage",
7 | "command": "${command:python.interpreterPath} -m pytest --cov=. tests/unit --cov-branch --cov-report xml:coverage.xml --cov-report term",
8 | "problemMatcher": []
9 | },
10 | {
11 | "type": "shell",
12 | "label": "Download synth data",
13 | "command": "az storage blob download-batch -d data -s data --pattern 'synth/**.parquet' --account-name nhpsa --auth-mode login --overwrite",
14 | "problemMatcher": []
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/src/nhp/model/data/reference/variant_lookup.json:
--------------------------------------------------------------------------------
1 | {
2 | "migration_category": "ppp",
3 | "var_proj_5_year_migration": "ppp",
4 | "var_proj_10_year_migration": "ppp",
5 | "var_proj_high_intl_migration": "ppp",
6 | "var_proj_low_intl_migration": "ppp",
7 | "var_proj_zero_net_migration": "ppp",
8 | "high_population": "hle",
9 | "young_age_structure": "lle",
10 | "high_fertility": "ppp",
11 | "old_age_structure": "hle",
12 | "low_population": "lle",
13 | "low_fertility": "ppp",
14 | "high_life_expectancy": "hle",
15 | "low_life_expectancy": "lle",
16 | "no_mortality_improvement": "ppp",
17 | "zero_net_migration": "ppp",
18 | "replacement_fertility": "ppp",
19 | "custom_projection_R0A66": "ppp",
20 | "custom_projection_RD8": "ppp"
21 | }
--------------------------------------------------------------------------------
/.github/workflows/codecov.yaml:
--------------------------------------------------------------------------------
1 | name: CodeCov
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | jobs:
10 | run:
11 | runs-on: ubuntu-latest
12 |
13 | defaults:
14 | run:
15 | shell: bash -l {0}
16 |
17 | steps:
18 | - uses: actions/checkout@v5
19 |
20 | - name: Install the latest version of uv
21 | uses: astral-sh/setup-uv@v6
22 | with:
23 | version: "latest"
24 | activate-environment: true
25 |
26 | - name: Install dependencies
27 | run: uv pip install -e ".[dev]"
28 |
29 | - name: Generate Report
30 | run: uv run pytest --cov=. tests/unit --ignore=tests --cov-branch --cov-report xml:coverage.xml
31 |
32 | - name: Upload Coverage to Codecov
33 | uses: codecov/codecov-action@v5
34 | with:
35 | token: ${{ secrets.CODECOV_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/linting.yaml:
--------------------------------------------------------------------------------
1 | name: Linting and Type checking
2 |
3 | on: pull_request
4 |
5 | jobs:
6 | ruff-check:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v5
10 |
11 | - uses: astral-sh/ruff-action@v3
12 |
13 | ruff-format-check:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v5
17 |
18 | - uses: astral-sh/ruff-action@v3
19 | with:
20 | args: format --check --diff
21 |
22 | ty-check:
23 | runs-on: ubuntu-latest
24 | steps:
25 | - uses: actions/checkout@v5
26 |
27 | - name: Install the latest version of uv
28 | uses: astral-sh/setup-uv@v6
29 | with:
30 | version: "latest"
31 | activate-environment: true
32 |
33 | - name: Install dependencies
34 | run: uv pip install -e ".[dev,databricks]"
35 |
36 | - name: Generate Report
37 | run: uvx ty check .
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test_helpers.py:
--------------------------------------------------------------------------------
1 | """Test helper methods."""
2 |
3 | from unittest.mock import Mock
4 |
5 | import pytest
6 |
7 | from nhp.model.helpers import inrange, rnorm
8 |
9 |
10 | @pytest.mark.parametrize("value, expected", [(-1.1, 0), (1.1, 1), (0, 0), (1, 1), (0.5, 0.5)])
11 | def test_inrange(value, expected):
12 | """Test that the inrange function returns expected values."""
13 | assert inrange(value) == expected
14 |
15 |
16 | @pytest.mark.parametrize(
17 | "value, low, high, expected", [(0, 0.25, 0.75, 0.25), (1, 0.25, 0.75, 0.75)]
18 | )
19 | def test_inrange_lo_hi(value, low, high, expected):
20 | """Test that the inrange function returns expected values."""
21 | assert inrange(value, low, high) == expected
22 |
23 |
24 | def test_rnorm():
25 | """Test that the rnorm function returns random values."""
26 | rng = Mock()
27 | rng.normal.return_value = 1.5
28 | assert rnorm(rng, 1, 2) == 1.5
29 | rng.normal.assert_called_once_with(1.5, 0.3901520929904105)
30 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Run Model",
9 | "type": "debugpy",
10 | "request": "launch",
11 | "module": "nhp.model",
12 | "args": [
13 | "${input:params_file}",
14 | "-d=${config:nhp.data_path}",
15 | "--type=${input:type}"
16 | ],
17 | "console": "integratedTerminal"
18 | }
19 | ],
20 | "inputs": [
21 | {
22 | "id": "params_file",
23 | "type": "promptString",
24 | "description": "Path to parameters file (leave empty to use sample parameters)",
25 | "default": ""
26 | },
27 | {
28 | "id": "type",
29 | "type": "pickString",
30 | "description": "Model Run Type",
31 | "options": [
32 | "ip",
33 | "op",
34 | "aae",
35 | "all"
36 | ]
37 | }
38 | ]
39 | }
--------------------------------------------------------------------------------
/.github/workflows/deploy_release.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | tags:
4 | - 'v*.*.*'
5 |
6 | name: Deploy Production
7 |
8 | jobs:
9 |
10 | set-tag:
11 | runs-on: ubuntu-latest
12 | outputs:
13 | tag: ${{ steps.create-tag.outputs.TAG }}
14 |
15 | steps:
16 | - name: Create tag
17 | id: create-tag
18 | run: |
19 | TAG=`echo ${{ github.ref_name }} | awk 'BEGIN { FS="."; } { print ""$1"."$2; }'`
20 | echo "TAG=$TAG" >> $GITHUB_OUTPUT
21 |
22 | deploy-ghcr-production:
23 | needs: [set-tag]
24 | uses: ./.github/workflows/build_container.yaml
25 | with:
26 | docker-tag: ghcr.io/the-strategy-unit/nhp_model:${{ needs.set-tag.outputs.tag }},ghcr.io/the-strategy-unit/nhp_model:latest
27 | app-version: ${{ github.ref_name }}
28 | data-version: ${{vars.data_version}}
29 | secrets: inherit
30 |
31 | deploy-schema:
32 | needs: [set-tag]
33 |
34 | uses: ./.github/workflows/build_schema.yaml
35 | with:
36 | schema-tag: ${{ needs.set-tag.outputs.tag }}
37 |
--------------------------------------------------------------------------------
/docs/gen_ref_pages.py:
--------------------------------------------------------------------------------
1 | """Generate the code reference pages and navigation."""
2 |
3 | from pathlib import Path
4 |
5 | import mkdocs_gen_files
6 |
7 | nav = mkdocs_gen_files.Nav()
8 |
9 | src = Path(__file__).parent.parent / "src"
10 | for path in sorted(src.rglob("*.py")):
11 | module_path = path.relative_to(src).with_suffix("")
12 | doc_path = path.relative_to(src).with_suffix(".md")
13 | full_doc_path = Path("reference", doc_path)
14 |
15 | parts = tuple(module_path.parts)
16 |
17 | if parts[-1] == "__init__":
18 | parts = parts[:-1]
19 | doc_path = doc_path.with_name("index.md")
20 | full_doc_path = full_doc_path.with_name("index.md")
21 | elif parts[-1] == "__main__":
22 | continue
23 |
24 | nav[parts] = doc_path.as_posix()
25 |
26 | with mkdocs_gen_files.open(full_doc_path, "w") as fd:
27 | ident = ".".join(parts)
28 | fd.write(f"::: {ident}")
29 |
30 | mkdocs_gen_files.set_edit_path(full_doc_path, path)
31 |
32 | with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
33 | nav_file.writelines(nav.build_literate_nav())
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 NHS England
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/nhp/docker/config.py:
--------------------------------------------------------------------------------
1 | """config values for docker container."""
2 |
3 | import os
4 |
5 | import dotenv
6 |
7 |
8 | class Config:
9 | """Configuration class for Docker container."""
10 |
11 | def __init__(self):
12 | """Configuration settings for the Docker container."""
13 | dotenv.load_dotenv()
14 |
15 | self._app_version = os.environ.get("APP_VERSION", "dev")
16 | self._data_version = os.environ.get("DATA_VERSION", "dev")
17 | self._storage_account = os.environ.get("STORAGE_ACCOUNT")
18 |
19 | @property
20 | def APP_VERSION(self) -> str:
21 | """What is the version of the app?"""
22 | return self._app_version
23 |
24 | @property
25 | def DATA_VERSION(self) -> str:
26 | """What version of the data are we using?"""
27 | return self._data_version
28 |
29 | @property
30 | def STORAGE_ACCOUNT(self) -> str:
31 | """What is the name of the storage account?"""
32 | if self._storage_account is None:
33 | raise ValueError("STORAGE_ACCOUNT environment variable must be set")
34 | return self._storage_account
35 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: NHP Model Documentation
2 | site_description: Documentation for the NHP Model project
3 | site_url: https://connect.strategyunitwm.nhs.uk/nhp/model_documentation/
4 |
5 | repo_url: https://github.com/the-strategy-unit/nhp_model
6 | repo_name: the-strategy-unit/nhp_model
7 |
8 | theme:
9 | name: material
10 | features:
11 | - navigation.tabs
12 | - navigation.sections
13 | - navigation.expand
14 | - navigation.top
15 | - search.highlight
16 | - content.code.copy
17 |
18 | plugins:
19 | - search
20 | - gen-files:
21 | scripts:
22 | - docs/gen_ref_pages.py
23 | - literate-nav:
24 | nav_file: SUMMARY.md
25 | - section-index
26 | - mkdocstrings:
27 | handlers:
28 | python:
29 | options:
30 | docstring_style: google
31 | show_source: true
32 | show_root_heading: true
33 | show_root_toc_entry: false
34 | merge_init_into_class: true
35 | filters:
36 | - "!_version"
37 | - "!__main__"
38 |
39 | nav:
40 | - Home: index.md
41 | - API Reference: reference/
42 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # NHP Model Documentation
2 |
3 | Welcome to the NHP Model documentation. This project provides modeling capabilities for healthcare activity prediction.
4 |
5 | ## Features
6 |
7 | - Multiple model types (inpatients, outpatients, A&E)
8 | - Support for loading data from different sources
9 | - Docker containerization
10 |
11 | ## Quick Start
12 |
13 | Download and install [`uv`](https://docs.astral.sh/uv/getting-started/installation/), then run `uv sync`. Download data locally, e.g., download a synthetic dataset to `data/synth`. Then, run the model with:
14 |
15 | ``` bash
16 | uv run python -m nhp.model -d data/synth --type all
17 | ```
18 |
19 | to run the model with the sample parameters.
20 |
21 | ### Generating Sample Parameters
22 |
23 | you can generate sample parameters using the CLI command:
24 |
25 | ``` bash
26 | uv run python -m nhp.model.params --dataset [dataset] --scenario [scenario] --app-version dev > params.json
27 | ```
28 |
29 | replacing the values as needed. This will generate a file `params.json` with the sample parameters.
30 |
31 | ## API Reference
32 |
33 | See the [Model Reference](reference/nhp/model/index.md) for detailed documentation of all classes and functions.
34 |
--------------------------------------------------------------------------------
/.github/workflows/remove_untagged_container_images.yaml:
--------------------------------------------------------------------------------
1 | name: Clean up untagged container images
2 |
3 | on:
4 | workflow_dispatch: # allows manual triggering via GitHub UI
5 | schedule:
6 | - cron: '0 1 * * *' # runs at 01:00 UTC every day
7 |
8 | jobs:
9 | remove-untagged-images:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | # you must create a classic PAT with `delete:packages` scope and add it as a secret named `PAT_DELETE_PACKAGES`
14 | - name: Authenticate with PAT
15 | run: echo "${{ secrets.PAT_DELETE_PACKAGES }}" | gh auth login --with-token
16 | - name: "Remove untagged images"
17 | run: |
18 | VERSION_IDS=$(gh api /orgs/the-strategy-unit/packages/container/nhp_model/versions \
19 | -H "Accept: application/vnd.github+json" \
20 | --paginate | \
21 | jq -r '.[] | select(.metadata.container.tags | length == 0) | .id')
22 |
23 | for VERSION_ID in $VERSION_IDS; do
24 | echo "Deleting version ID: $VERSION_ID"
25 | gh api "/orgs/the-strategy-unit/packages/container/nhp_model/versions/${VERSION_ID}" \
26 | -X DELETE \
27 | -H "Accept: application/vnd.github+json"
28 | done
--------------------------------------------------------------------------------
/src/nhp/model/helpers.py:
--------------------------------------------------------------------------------
1 | """Helper methods for the model package."""
2 |
3 | import json
4 |
5 | import numpy as np
6 | import pandas as pd
7 |
8 |
9 | def inrange(value: float, low: float = 0, high: float = 1) -> float:
10 | """Force a value to be in the interval [low, high].
11 |
12 | Args:
13 | value: The value we want to constrain to the interval.
14 | low: The minimum that `value` can be. Defaults to 0.
15 | high: The maximum that `value` can be. Defaults to 1.
16 |
17 | Returns:
18 | `value` constrained to the interval.
19 | """
20 | return max(low, min(high, value))
21 |
22 |
23 | def rnorm(rng: np.random.Generator, low: float, high: float) -> float:
24 | """Create a single random normal value from a 80% confidence interval.
25 |
26 | Args:
27 | rng: A random number generator.
28 | low: The low estimate of our 80% confidence interval.
29 | high: The high estimate of our 80% confidence interval.
30 |
31 | Returns:
32 | A random normal value.
33 | """
34 | q = 2.563103 # generated by: 2 * norm.ppf(1 - (1 - 0.8) / 2)
35 | mean = (high + low) / 2
36 | stdev = (high - low) / q
37 | return rng.normal(mean, stdev)
38 |
--------------------------------------------------------------------------------
/src/nhp/model/data/reference/__init__.py:
--------------------------------------------------------------------------------
1 | """Reference Data.
2 |
3 | Any reference data needed for the model should be stored in this folder.
4 |
5 | Helper methods for loading the reference data should be created here.
6 | """
7 |
8 | import json
9 | import pathlib
10 |
11 | import pandas as pd
12 |
13 |
14 | def _ref_path(filename):
15 | path = pathlib.Path(__file__).parent.resolve()
16 | return path.joinpath(filename)
17 |
18 |
19 | def variant_lookup() -> dict:
20 | """Variant Lookup (Health Status Adjustment).
21 |
22 | Returns:
23 | A dictionary of the variant lookups.
24 | """
25 | with _ref_path("variant_lookup.json").open("r", encoding="UTF-8") as vlup_file:
26 | return json.load(vlup_file)
27 |
28 |
29 | def life_expectancy() -> pd.DataFrame:
30 | """Life Expectancy (Health Status Adjustment).
31 |
32 | Returns:
33 | A pandas DataFrame containing life expectancy data.
34 | """
35 | return pd.read_csv(_ref_path("life_expectancy.csv"))
36 |
37 |
38 | def split_normal_params() -> pd.DataFrame:
39 | """Split Normal Parameters (Health Status Adjustment).
40 |
41 | Returns:
42 | A pandas DataFrame containing split normal parameters.
43 | """
44 | return pd.read_csv(_ref_path("hsa_split_normal_params.csv"))
45 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ghcr.io/astral-sh/uv:python3.13-alpine
2 |
3 | # Create user
4 | RUN addgroup -g 1000 nhp && adduser -u 1000 -G nhp -s /bin/sh -h /app -D nhp
5 | WORKDIR /app
6 | USER nhp
7 |
8 | # Create directories with proper permissions (as root)
9 | RUN for DIR in data queue results; do mkdir -p $DIR; done
10 |
11 | # Copy dependency files first (optimal caching)
12 | COPY --chown=nhp:nhp pyproject.toml uv.lock ./
13 |
14 | # Install dependencies only (skip local package)
15 | RUN uv sync --frozen --no-dev --no-install-project
16 |
17 | # Ensure Python can find installed packages and local model
18 | ENV PATH="/app/.venv/bin:$PATH"
19 |
20 | # Copy application code (changes most frequently)
21 | COPY --chown=nhp:nhp src/nhp/ /app/src/nhp/
22 | RUN uv pip install .
23 |
24 | # define build arguments, these will set the environment variables in the container
25 | ARG app_version
26 | ARG data_version
27 | ARG storage_account
28 |
29 | ENV APP_VERSION=$app_version
30 | ENV DATA_VERSION=$data_version
31 | ENV STORAGE_ACCOUNT=$storage_account
32 |
33 | # Define static environment variables
34 | ENV BATCH_SIZE=16
35 |
36 | # temporary patch until we update the api
37 | USER root
38 | RUN printf '#!/bin/sh\n/app/.venv/bin/python -m nhp.docker "$@"\n' > /opt/docker_run.py && \
39 | chmod +x /opt/docker_run.py
40 | USER nhp
41 |
42 | ENTRYPOINT ["python", "-m", "nhp.docker"]
43 |
--------------------------------------------------------------------------------
/tests/unit/nhp/docker/test_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from unittest.mock import patch
3 |
4 | import pytest
5 |
6 | from nhp.docker.config import Config
7 |
8 |
9 | def test_config_sets_values_from_envvars(mocker):
10 | # arrange
11 | mocker.patch("dotenv.load_dotenv")
12 |
13 | # act
14 | with patch.dict(
15 | os.environ,
16 | {
17 | "APP_VERSION": "app version",
18 | "DATA_VERSION": "data version",
19 | "STORAGE_ACCOUNT": "storage account",
20 | },
21 | ):
22 | config = Config()
23 |
24 | # assert
25 | assert config.APP_VERSION == "app version"
26 | assert config.DATA_VERSION == "data version"
27 | assert config.STORAGE_ACCOUNT == "storage account"
28 |
29 |
30 | def test_config_uses_default_values(mocker):
31 | # arrange
32 | mocker.patch("dotenv.load_dotenv")
33 |
34 | # act
35 | config = Config()
36 |
37 | # assert
38 | assert config.APP_VERSION == "dev"
39 | assert config.DATA_VERSION == "dev"
40 |
41 | with pytest.raises(ValueError, match="STORAGE_ACCOUNT environment variable must be set"):
42 | config.STORAGE_ACCOUNT
43 |
44 |
45 | def test_config_calls_dotenv_load(mocker):
46 | # arrange
47 | m = mocker.patch("dotenv.load_dotenv")
48 |
49 | # act
50 | config = Config()
51 |
52 | # assert
53 | m.assert_called_once()
54 |
--------------------------------------------------------------------------------
/.github/workflows/removed_closed_prs.yaml:
--------------------------------------------------------------------------------
1 | name: Clean up closed pull requests
2 |
3 | on:
4 | pull_request:
5 | types:
6 | - closed
7 | jobs:
8 | remove-pr-image:
9 | runs-on: ubuntu-latest
10 | steps:
11 | # you must create a classic PAT with `delete:packages` scope and add it as a secret named `PAT_DELETE_PACKAGES`
12 | - name: Authenticate with PAT
13 | run: echo "${{ secrets.PAT_DELETE_PACKAGES }}" | gh auth login --with-token
14 | - name: "Remove PR image"
15 | env:
16 | TAG_TO_DELETE: "pr-${{ github.event.pull_request.number }}"
17 | run: |
18 | ALL_VERSIONS=$(gh api /orgs/the-strategy-unit/packages/container/nhp_model/versions \
19 | -H "Accept: application/vnd.github+json" \
20 | --paginate)
21 |
22 | VERSION_ID=$(jq -r --arg tag $TAG_TO_DELETE \
23 | '.[] | select(.metadata.container.tags[] == $tag) | .id' \
24 | <<< "$ALL_VERSIONS")
25 |
26 | if [ -n "$VERSION_ID" ]; then
27 | echo "Deleting version ID: $VERSION_ID"
28 | gh api \
29 | -X DELETE \
30 | /orgs/the-strategy-unit/packages/container/nhp_model/versions/${VERSION_ID} \
31 | -H "Accept: application/vnd.github+json"
32 | else
33 | echo "Tag '$TAG_TO_DELETE' not found — skipping delete"
34 | fi
--------------------------------------------------------------------------------
/tests/unit/nhp/model/data/test_data.py:
--------------------------------------------------------------------------------
1 | """test nhp data (local)."""
2 |
3 | import pytest
4 |
5 | from nhp.model.data import Data
6 |
7 |
8 | def test_get_ip():
9 | d = Data()
10 | with pytest.raises(NotImplementedError):
11 | d.get_ip()
12 |
13 |
14 | def test_get_ip_strategies():
15 | d = Data()
16 | with pytest.raises(NotImplementedError):
17 | d.get_ip_strategies()
18 |
19 |
20 | def test_get_op():
21 | d = Data()
22 | with pytest.raises(NotImplementedError):
23 | d.get_op()
24 |
25 |
26 | def test_get_aae():
27 | d = Data()
28 | with pytest.raises(NotImplementedError):
29 | d.get_aae()
30 |
31 |
32 | def test_get_birth_factors():
33 | d = Data()
34 | with pytest.raises(NotImplementedError):
35 | d.get_birth_factors()
36 |
37 |
38 | def test_get_demographic_factors():
39 | d = Data()
40 | with pytest.raises(NotImplementedError):
41 | d.get_demographic_factors()
42 |
43 |
44 | def test_get_hsa_activity_table():
45 | d = Data()
46 | with pytest.raises(NotImplementedError):
47 | d.get_hsa_activity_table()
48 |
49 |
50 | def test_get_hsa_gams():
51 | d = Data()
52 | with pytest.raises(NotImplementedError):
53 | d.get_hsa_gams()
54 |
55 |
56 | def test_get_inequalities():
57 | d = Data()
58 | with pytest.raises(NotImplementedError):
59 | d.get_inequalities()
60 |
--------------------------------------------------------------------------------
/.github/workflows/build_container.yaml:
--------------------------------------------------------------------------------
1 | name: Deploy schema.json to GitHub Pages
2 |
3 | on:
4 | workflow_call:
5 | inputs:
6 | docker-tag:
7 | required: true
8 | default: dev
9 | type: string
10 | app-version:
11 | required: true
12 | default: dev
13 | type: string
14 | data-version:
15 | required: true
16 | default: dev
17 | type: string
18 |
19 | jobs:
20 |
21 | build-container:
22 | runs-on: ubuntu-latest
23 | steps:
24 | - name: 'Checkout GitHub Action'
25 | uses: actions/checkout@v5
26 |
27 | - name: 'Login to GitHub Container Registry'
28 | uses: docker/login-action@v3
29 | with:
30 | registry: ghcr.io
31 | username: ${{github.actor}}
32 | password: ${{secrets.GITHUB_TOKEN}}
33 | - name: Set up Docker Buildx
34 | uses: docker/setup-buildx-action@v3
35 |
36 | - name: "Build image"
37 | uses: docker/build-push-action@v6
38 | with:
39 | context: .
40 | tags: ${{ inputs.docker-tag }}
41 | push: true
42 | cache-from: type=gha
43 | cache-to: type=gha,mode=max
44 | platforms: linux/amd64
45 | provenance: false
46 | sbom: false
47 | build-args: |
48 | app_version=${{ inputs.app-version }}
49 | data_version=${{ inputs.data-version }}
50 |
--------------------------------------------------------------------------------
/.github/workflows/deploy_docs.yaml:
--------------------------------------------------------------------------------
1 | name: Deploy Documentation
2 |
3 | on:
4 | push:
5 | branches: [main]
6 |
7 | permissions:
8 | contents: read
9 | pages: write
10 | id-token: write
11 |
12 | concurrency:
13 | group: "pages"
14 | cancel-in-progress: false
15 |
16 | jobs:
17 | build:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - uses: actions/checkout@v4
21 |
22 | - name: Install uv
23 | uses: astral-sh/setup-uv@v6
24 |
25 | - name: Set up Python
26 | run: uv python install
27 |
28 | - name: Install dependencies
29 | run: uv sync --extra docs
30 |
31 | - name: Build documentation
32 | run: uv run mkdocs build --clean
33 |
34 | - name: Upload artifact
35 | uses: actions/upload-artifact@v4
36 | with:
37 | name: site
38 | path: ./site
39 |
40 | deploy:
41 | runs-on: ubuntu-latest
42 | needs: build
43 | steps:
44 | - name: Download artifact
45 | uses: actions/download-artifact@v4
46 | with:
47 | name: site
48 | path: ./site
49 |
50 | - name: Install uv (for rsconnect)
51 | uses: astral-sh/setup-uv@v6
52 |
53 | - name: Configure Connect
54 | run: uvx rsconnect add -s ${{ secrets.RSCONNECT_URL }} -n connect -k ${{ secrets.RSCONNECT_API_KEY }}
55 |
56 | - name: Deploy to Connect
57 | run: uvx rsconnect deploy html site -a ${{ vars.CONNECT_DOCS_APP_ID }}
58 |
--------------------------------------------------------------------------------
/.github/workflows/deploy_pr.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | pull_request:
3 | types: [opened, synchronize, reopened]
4 |
5 | name: Deploy PR
6 |
7 | jobs:
8 |
9 | deploy-ghcr-pr:
10 | uses: ./.github/workflows/build_container.yaml
11 | with:
12 | docker-tag: ghcr.io/the-strategy-unit/nhp_model:pr-${{ github.event.number }}
13 | app-version: dev
14 | data-version: dev
15 | secrets: inherit
16 |
17 | add-comment-to-pr:
18 | runs-on: ubuntu-latest
19 | needs: ["deploy-ghcr-pr"]
20 | steps:
21 | - name: Find Comment
22 | uses: peter-evans/find-comment@v3
23 | id: fc
24 | with:
25 | issue-number: ${{ github.event.pull_request.number }}
26 | comment-author: 'github-actions[bot]'
27 | body-includes: "## ✅ A new build is available"
28 |
29 | - name: Comment with container image link
30 | if: github.event_name == 'pull_request'
31 | uses: peter-evans/create-or-update-comment@v4
32 | with:
33 | token: ${{ secrets.GITHUB_TOKEN }}
34 | comment-id: ${{ steps.fc.outputs.comment-id }}
35 | issue-number: ${{ github.event.pull_request.number }}
36 | body: |
37 | ## ✅ A new build is available.
38 |
39 | You can use the following to use pull the image into your local environment:
40 |
41 | ``` bash
42 | docker pull ghcr.io/the-strategy-unit/nhp_model:pr-${{ github.event.number }}
43 | ```
44 | edit-mode: replace
45 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test_params.py:
--------------------------------------------------------------------------------
1 | """Test params-sample."""
2 |
3 | from unittest.mock import mock_open, patch
4 |
5 | from nhp.model.params import load_params, load_sample_params, validate_params
6 |
7 |
8 | def test_validate_params(mocker):
9 | # arrange
10 | m_validate = mocker.patch("jsonschema.validate")
11 | m_json_load = mocker.patch("json.load", return_value="schema")
12 |
13 | # act
14 | validate_params("params") # ty: ignore[invalid-argument-type]
15 |
16 | # assert
17 | m_validate.assert_called_once_with(instance="params", schema="schema")
18 | assert m_json_load.call_args[0][0].name.endswith("params-schema.json")
19 |
20 |
21 | def test_load_params(mocker):
22 | """Test that load_params opens the params file."""
23 | # arrange
24 | m_vp = mocker.patch("nhp.model.params.validate_params")
25 |
26 | # act
27 | with patch("builtins.open", mock_open(read_data='{"params": 0}')) as mock_file:
28 | assert load_params("filename.json") == {"params": 0}
29 |
30 | # assert
31 | mock_file.assert_called_with("filename.json", "r", encoding="UTF-8")
32 | m_vp.assert_called_once_with({"params": 0})
33 |
34 |
35 | def test_load_sample_params(mocker):
36 | # arrange
37 | m_validate = mocker.patch("nhp.model.params.validate_params")
38 |
39 | # act
40 | actual = load_sample_params(dataset="dev", scenario="unit-test")
41 |
42 | # assert
43 | assert actual["dataset"] == "dev"
44 | assert actual["scenario"] == "unit-test"
45 | m_validate.assert_called_once_with(actual)
46 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/data/test_reference.py:
--------------------------------------------------------------------------------
1 | """test nhp data (reference)."""
2 |
3 | from nhp.model.data import reference
4 |
5 | expected_hsa_variants = {"lle", "hle", "ppp"}
6 |
7 |
8 | def test_variants():
9 | # arrange
10 |
11 | # act
12 | vl = reference.variant_lookup()
13 |
14 | # assert
15 | assert len(vl) == 19
16 | assert set(vl.values()) == expected_hsa_variants
17 |
18 |
19 | def test_life_expectancy():
20 | # arrange
21 |
22 | # act
23 | le = reference.life_expectancy()
24 |
25 | # assert
26 | assert len(le) == 276
27 | assert list(le.columns) == ["var", "sex", "age"] + [str(i) for i in range(2018, 2044)]
28 | assert set(le["var"]) == expected_hsa_variants
29 | assert set(le["sex"]) == {1, 2}
30 | assert list(le["age"]) == list(range(55, 101)) * 6
31 | assert le[[str(i) for i in range(2018, 2043)]].sum().sum() == 89323.6
32 |
33 |
34 | def test_split_normal_params():
35 | # arrange
36 |
37 | # act
38 | snp = reference.split_normal_params()
39 |
40 | # assert
41 | assert len(snp) == 144
42 | assert list(snp.columns) == [
43 | "var",
44 | "sex",
45 | "year",
46 | "mode",
47 | "sd1",
48 | "sd2",
49 | ]
50 | assert set(snp["var"]) == expected_hsa_variants
51 | assert set(snp["sex"]) == {"f", "m"}
52 | assert snp["year"].to_list() == list(range(2020, 2044)) * 6
53 | assert snp[["mode", "sd1", "sd2"]].sum().to_list() == [
54 | 12.159496878354162,
55 | 55.57842646603717,
56 | 140.31508181965998,
57 | ]
58 |
--------------------------------------------------------------------------------
/src/nhp/model/params/__main__.py:
--------------------------------------------------------------------------------
1 | """Generate sample parameters."""
2 |
3 | import argparse
4 | import json
5 | import random
6 | from datetime import datetime
7 |
8 | from . import load_sample_params
9 |
10 |
11 | def _parse_args():
12 | parser = argparse.ArgumentParser(description="CLI for loading sample parameters.")
13 | parser.add_argument("--dataset", required=True, help="Dataset name")
14 | parser.add_argument("--scenario", required=True, help="Scenario name")
15 | parser.add_argument("--app-version", default="dev", help="App version (default: dev)")
16 | parser.add_argument("--model-runs", type=int, default=256, help="Model Runs (default: 256)")
17 | parser.add_argument("--start-year", type=int, default=2023, help="Start year (default: 2023)")
18 | parser.add_argument("--end-year", type=int, default=2041, help="End year (default: 2041)")
19 | parser.add_argument(
20 | "--seed",
21 | type=int,
22 | default=None,
23 | help="Random seed (default: a random integer between 0 and 10000)",
24 | )
25 |
26 | return parser.parse_args()
27 |
28 |
29 | def main():
30 | """Generate sample parameters and print them to the console."""
31 | args = _parse_args()
32 |
33 | if args.seed is None:
34 | args.seed = random.randint(0, 10000)
35 |
36 | params = load_sample_params(
37 | dataset=args.dataset,
38 | scenario=args.scenario,
39 | app_version=args.app_version,
40 | start_year=args.start_year,
41 | end_year=args.end_year,
42 | seed=args.seed,
43 | )
44 |
45 | params["create_datetime"] = datetime.now().strftime("%Y%m%d_%H%M%S")
46 |
47 | print(json.dumps(params, indent=2))
48 |
49 |
50 | def _init():
51 | main()
52 |
53 |
54 | if __name__ == "__main__":
55 | _init()
56 |
--------------------------------------------------------------------------------
/.github/workflows/build_schema.yaml:
--------------------------------------------------------------------------------
1 | name: Deploy schema.json to GitHub Pages
2 |
3 | on:
4 | workflow_call:
5 | inputs:
6 | schema-tag:
7 | required: true
8 | default: dev
9 | type: string
10 |
11 | permissions:
12 | pages: write
13 | id-token: write
14 | contents: write
15 |
16 | jobs:
17 | build:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - name: Checkout repository
21 | uses: actions/checkout@v5
22 |
23 | - name: Clone existing schemas branch content
24 | run: |
25 | git fetch --depth=1 origin schemas
26 | git worktree add schemas schemas
27 |
28 | - name: Copy schema to app version path
29 | run: |
30 | mkdir -p schemas/${{ inputs.schema-tag }}
31 | sed '/$id/ s/dev/${{ inputs.schema-tag }}/' src/nhp/model/params/params-schema.json > schemas/${{ inputs.schema-tag }}/params-schema.json
32 |
33 | - name: Commit the schema
34 | run: |
35 | git config user.name "github-actions[bot]"
36 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
37 | pushd schemas
38 | git add ${{ inputs.schema-tag }}/params-schema.json
39 | git commit -m "adding schema for ${{ inputs.schema-tag }}" || echo "No changes to commit"
40 | git push origin schemas
41 | popd
42 |
43 | - name: Upload to GitHub Pages
44 | uses: actions/upload-pages-artifact@v4
45 | with:
46 | path: schemas
47 |
48 | deploy:
49 | needs: build
50 | runs-on: ubuntu-latest
51 | environment:
52 | name: github-pages
53 | url: ${{ steps.deployment.outputs.page_url }}
54 | steps:
55 | - name: Deploy to GitHub Pages
56 | id: deployment
57 | uses: actions/deploy-pages@v4
58 |
--------------------------------------------------------------------------------
/src/nhp/model/params/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for working with model parameter files."""
2 |
3 | import json
4 |
5 | from importlib_resources import files
6 |
7 | from nhp.model import params as params_module
8 |
9 |
10 | def validate_params(params: dict) -> None:
11 | """Validate model parameters.
12 |
13 | Args:
14 | params: The model parameters to validate.
15 |
16 | Raises:
17 | jsonschema.ValidationError: If the parameters are not valid.
18 | """
19 | # lazy load for test collection performance
20 | import jsonschema # noqa: PLC0415
21 |
22 | with (
23 | files(params_module)
24 | .joinpath("params-schema.json")
25 | .open("r", encoding="UTF-8") as schema_file
26 | ):
27 | schema = json.load(schema_file)
28 |
29 | jsonschema.validate(instance=params, schema=schema)
30 |
31 |
32 | def load_params(filename: str) -> dict:
33 | """Load a params file.
34 |
35 | Args:
36 | filename: The full name of the file that we wish to load.
37 |
38 | Raises:
39 | jsonschema.ValidationError: If the parameters are not valid.
40 |
41 | Returns:
42 | The model parameters.
43 | """
44 | with open(filename, "r", encoding="UTF-8") as prf:
45 | params = json.load(prf)
46 |
47 | validate_params(params)
48 |
49 | return params
50 |
51 |
52 | def load_sample_params(**kwargs) -> dict:
53 | """Load a sample params file.
54 |
55 | Args:
56 | **kwargs: Any parameters to override in the sample params.
57 |
58 | Raises:
59 | jsonschema.ValidationError: If the parameters are not valid.
60 |
61 | Returns:
62 | The model parameters.
63 | """
64 | with files(params_module).joinpath("params-sample.json").open("r", encoding="UTF-8") as prf:
65 | params = json.load(prf)
66 |
67 | params.update(kwargs)
68 |
69 | validate_params(params)
70 |
71 | return params
72 |
--------------------------------------------------------------------------------
/.github/workflows/build_app.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | tags:
6 | - 'v*'
7 |
8 | jobs:
9 |
10 | build-app:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - name: "Checkout GitHub Action"
14 | uses: actions/checkout@v5
15 |
16 | - name: Install the latest version of uv
17 | uses: astral-sh/setup-uv@v6
18 | with:
19 | version: "latest"
20 | enable-cache: true
21 | cache-dependency-glob: "uv.lock"
22 |
23 | - name: Build (release)
24 | if: github.ref != 'refs/heads/main'
25 | run: uv build
26 |
27 | - name: Build (dev)
28 | if: github.ref == 'refs/heads/main'
29 | env:
30 | SETUPTOOLS_SCM_PRETEND_VERSION: 0.dev0
31 | run: uv build
32 |
33 | - name: Generate artifact
34 | uses: actions/upload-artifact@v4
35 | with:
36 | name: dist-whl
37 | path: dist/*.whl
38 |
39 | upload-build-to-storage-account:
40 | runs-on: ubuntu-latest
41 | needs: ["build-app"]
42 |
43 | steps:
44 | - name: Download build artifact
45 | uses: actions/download-artifact@v4
46 | with:
47 | name: dist-whl
48 | path: .
49 |
50 | - name: Install Azure CLI
51 | uses: Azure/setup-azd@v2
52 |
53 | - name: Upload to blob storage
54 | run: |
55 | az storage blob upload \
56 | --account-name ${{ secrets.NHP_STORAGE_ACCOUNT }} \
57 | --container-name app \
58 | --file $(ls *.whl) \
59 | --sas-token "${{ secrets.APP_CONTAINER_SAS }}" \
60 | --overwrite
61 |
62 | add-build-to-release:
63 | runs-on: ubuntu-latest
64 | needs: ["build-app"]
65 | permissions:
66 | contents: write
67 |
68 | steps:
69 | - name: Download build artifact
70 | uses: actions/download-artifact@v4
71 | with:
72 | name: dist-whl
73 | path: .
74 | - name: Upload artifact to the GitHub Release
75 | uses: softprops/action-gh-release@v2
76 | if: github.ref_type == 'tag'
77 | with:
78 | files: "*.whl"
79 | env:
80 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--------------------------------------------------------------------------------
/src/nhp/model/data/data.py:
--------------------------------------------------------------------------------
1 | """NHP Data Loaders.
2 |
3 | Classes for loading data for the NHP model. Each class supports loading data from different sources,
4 | such as from local storage or directly from DataBricks.
5 | """
6 |
7 | from typing import Any
8 |
9 | import pandas as pd
10 |
11 |
12 | class Data:
13 | """Load NHP data.
14 |
15 | Interface for loading data for the NHP model. This interface should have no concrete
16 | implementations, instead other classes should derive from this interface.
17 | """
18 |
19 | def __init__(self):
20 | """Initialise Data data loader class."""
21 | pass
22 |
23 | def get_ip(self) -> pd.DataFrame:
24 | """Get the inpatients dataframe.
25 |
26 | Returns:
27 | The inpatients dataframe.
28 | """
29 | raise NotImplementedError()
30 |
31 | def get_ip_strategies(self) -> dict[str, pd.DataFrame]:
32 | """Get the inpatients strategies dataframe.
33 |
34 | Returns:
35 | The inpatients strategies dataframe.
36 | """
37 | raise NotImplementedError()
38 |
39 | def get_op(self) -> pd.DataFrame:
40 | """Get the outpatients dataframe.
41 |
42 | Returns:
43 | The outpatients dataframe.
44 | """
45 | raise NotImplementedError()
46 |
47 | def get_aae(self) -> pd.DataFrame:
48 | """Get the A&E dataframe.
49 |
50 | Returns:
51 | The A&E dataframe.
52 | """
53 | raise NotImplementedError()
54 |
55 | def get_birth_factors(self) -> pd.DataFrame:
56 | """Get the birth factors dataframe.
57 |
58 | Returns:
59 | The birth factors dataframe.
60 | """
61 | raise NotImplementedError()
62 |
63 | def get_demographic_factors(self) -> pd.DataFrame:
64 | """Get the demographic factors dataframe.
65 |
66 | Returns:
67 | The demographic factors dataframe.
68 | """
69 | raise NotImplementedError()
70 |
71 | def get_hsa_activity_table(self) -> pd.DataFrame:
72 | """Get the demographic factors dataframe.
73 |
74 | Returns:
75 | The demographic factors dataframe.
76 | """
77 | raise NotImplementedError()
78 |
79 | def get_hsa_gams(self) -> Any:
80 | """Get the health status adjustment gams.
81 |
82 | Returns:
83 | The health status adjustment gams.
84 | """
85 | raise NotImplementedError()
86 |
87 | def get_inequalities(self) -> pd.DataFrame:
88 | """Get the inequalities dataframe.
89 |
90 | Returns:
91 | The inequalities dataframe.
92 | """
93 | raise NotImplementedError()
94 |
--------------------------------------------------------------------------------
/tests/integration/nhp/model/test_run_model.py:
--------------------------------------------------------------------------------
1 | """Test single model runs for the NHP model."""
2 |
3 | import pandas as pd
4 | import pytest
5 |
6 | from nhp.model import (
7 | AaEModel,
8 | InpatientsModel,
9 | ModelIteration,
10 | OutpatientsModel,
11 | load_sample_params,
12 | )
13 | from nhp.model.data import Local
14 | from nhp.model.run import run_all
15 |
16 |
17 | @pytest.mark.parametrize(
18 | "model_class, expected_aggregations",
19 | [
20 | (
21 | InpatientsModel,
22 | {"sex+tretspef_grouped", "tretspef", "tretspef+los_group", "delivery_episode_in_spell"},
23 | ),
24 | (
25 | OutpatientsModel,
26 | {
27 | "sex+tretspef_grouped",
28 | "tretspef",
29 | },
30 | ),
31 | (
32 | AaEModel,
33 | {
34 | "acuity",
35 | "attendance_category",
36 | },
37 | ),
38 | ],
39 | )
40 | def test_single_model_run(model_class, expected_aggregations, data_dir):
41 | # arrange
42 | params = load_sample_params()
43 | data = Local.create(data_dir)
44 | model = model_class(params, data)
45 | expected_aggregations |= {
46 | "default",
47 | "sex+age_group",
48 | "age",
49 | "avoided_activity",
50 | }
51 |
52 | # act
53 | # rather than using the run_single_model_run function, we directly instantiate ModelIteration
54 | # this is so we can work with the results. run_single_model_run is used to print some output to
55 | # the console.
56 | m_run = ModelIteration(model, 1)
57 | model_results, step_counts = m_run.get_aggregate_results()
58 |
59 | # assert
60 | assert {isinstance(v, pd.Series) for v in model_results.values()} == {True}
61 | assert set(model_results.keys()) == expected_aggregations
62 | assert isinstance(step_counts, pd.Series)
63 |
64 |
65 | def test_all_model_runs(data_dir):
66 | # arrange
67 | params = load_sample_params(model_runs=4)
68 | nhp_data = Local.create(data_dir)
69 | res_path = "results/synthetic/test/20220101_000000"
70 |
71 | # act
72 | actual = run_all(params, nhp_data)
73 |
74 | # assert
75 | assert actual == (
76 | [
77 | f"{res_path}/{i}.parquet"
78 | for i in [
79 | "acuity",
80 | "age",
81 | "attendance_category",
82 | "avoided_activity",
83 | "default",
84 | "delivery_episode_in_spell",
85 | "sex+age_group",
86 | "sex+tretspef_grouped",
87 | "tretspef",
88 | "tretspef+los_group",
89 | "step_counts",
90 | ]
91 | ]
92 | + [
93 | f"{res_path}/params.json",
94 | ],
95 | "synthetic/test-20220101_000000",
96 | )
97 |
--------------------------------------------------------------------------------
/src/nhp/docker/__main__.py:
--------------------------------------------------------------------------------
1 | """Methods for running the NHP model in a Docker container."""
2 |
3 | import argparse
4 | import logging
5 | from datetime import datetime
6 |
7 | from nhp.docker.config import Config
8 | from nhp.docker.run import RunWithAzureStorage, RunWithLocalStorage
9 | from nhp.model.data import Local
10 | from nhp.model.run import run_all
11 |
12 |
13 | def parse_args():
14 | """Parse command line arguments."""
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument(
17 | "params_file",
18 | help="Name of the parameters file stored in Azure",
19 | )
20 |
21 | parser.add_argument(
22 | "--local-storage",
23 | "-l",
24 | action="store_true",
25 | help="Use local storage (instead of Azure)",
26 | )
27 |
28 | parser.add_argument("--save-full-model-results", action="store_true")
29 |
30 | return parser.parse_args()
31 |
32 |
33 | def main(config: Config = Config()):
34 | """The main method."""
35 | args = parse_args()
36 |
37 | logging.basicConfig(
38 | format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s",
39 | level=logging.INFO,
40 | datefmt="%Y-%m-%d %H:%M:%S",
41 | )
42 |
43 | if args.local_storage:
44 | runner = RunWithLocalStorage(args.params_file)
45 | else:
46 | runner = RunWithAzureStorage(args.params_file, config)
47 |
48 | logging.info("running model for: %s", args.params_file)
49 | logging.info("submitted by: %s", runner.params.get("user"))
50 | logging.info("model_runs: %s", runner.params["model_runs"])
51 | logging.info("start_year: %s", runner.params["start_year"])
52 | logging.info("end_year: %s", runner.params["end_year"])
53 | logging.info("app_version: %s", runner.params["app_version"])
54 |
55 | start_time = datetime.now()
56 |
57 | saved_files, results_file = run_all(
58 | runner.params,
59 | Local.create("data"),
60 | runner.progress_callback(),
61 | args.save_full_model_results,
62 | )
63 |
64 | end_time = datetime.now()
65 | elapsed_time = end_time - start_time
66 |
67 | additional_metadata = {
68 | "model_run_start_time": start_time.isoformat(),
69 | "model_run_end_time": end_time.isoformat(),
70 | "model_run_elapsed_time_seconds": elapsed_time.total_seconds(),
71 | }
72 |
73 | runner.finish(results_file, saved_files, args.save_full_model_results, additional_metadata)
74 |
75 | logging.info("complete")
76 |
77 |
78 | def init():
79 | """Method for calling main."""
80 | if __name__ == "__main__":
81 | # run the model in a try catch block - ensures any exceptions that occur in the
82 | # multiprocessing pool are handled and logged correctly.
83 | # this prevents the docker container from hanging indefinitely.
84 | try:
85 | config = Config()
86 | main(config)
87 | except Exception as e:
88 | logging.error("An error occurred: %s", str(e))
89 | raise e
90 |
91 |
92 | init()
93 |
--------------------------------------------------------------------------------
/src/nhp/model/__main__.py:
--------------------------------------------------------------------------------
1 | """Functions to run the model.
2 |
3 | This module allows you to run the various models. It allows you to run a single model run of one of
4 | the different types of models for debugging purposes, or it allows you to run all of the models in
5 | parallel saving the results to disk.
6 |
7 | There are existing launch profiles for vscode that use this file, or you can use it directly in the
8 | console, e.g.
9 |
10 | python -m nhp.model -d data --model-run 1 -t ip
11 |
12 | will run a single run of the inpatients model, returning the results to display.
13 | """
14 |
15 | import argparse
16 | import logging
17 |
18 | from nhp.model.aae import AaEModel
19 | from nhp.model.data import Local
20 | from nhp.model.inpatients import InpatientsModel
21 | from nhp.model.outpatients import OutpatientsModel
22 | from nhp.model.params import load_params, load_sample_params
23 | from nhp.model.run import run_all, run_single_model_run
24 |
25 |
26 | def _parse_args() -> argparse.Namespace: # pragma: no cover
27 | parser = argparse.ArgumentParser()
28 | parser.add_argument(
29 | "params_file",
30 | nargs="?",
31 | default="",
32 | help="Path to the params.json file (leave empty to use sample parameters).",
33 | )
34 | parser.add_argument("-d", "--data-path", help="Path to the data", default="data")
35 | parser.add_argument(
36 | "-r", "--model-run", help="Which model iteration to run", default=1, type=int
37 | )
38 | parser.add_argument(
39 | "-t",
40 | "--type",
41 | default="all",
42 | choices=["all", "aae", "ip", "op"],
43 | help="Model type, either: all, ip, op, aae",
44 | type=str,
45 | )
46 | parser.add_argument("--save-full-model-results", action="store_true")
47 | return parser.parse_args()
48 |
49 |
50 | def main() -> None:
51 | """Main method.
52 |
53 | Runs when __name__ == "__main__"
54 | """
55 | # Grab the Arguments
56 | args = _parse_args()
57 | if args.params_file == "":
58 | params = load_sample_params()
59 | else:
60 | params = load_params(args.params_file)
61 | # define the model to run
62 | match args.type:
63 | case "all":
64 | logging.basicConfig(
65 | format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s",
66 | level=logging.INFO,
67 | datefmt="%Y-%m-%d %H:%M:%S",
68 | )
69 |
70 | run_all(
71 | params,
72 | Local.create(args.data_path),
73 | lambda _: lambda _: None,
74 | args.save_full_model_results,
75 | )
76 | return
77 | case "aae":
78 | model_type = AaEModel
79 | case "ip":
80 | model_type = InpatientsModel
81 | case "op":
82 | model_type = OutpatientsModel
83 | case _:
84 | raise ValueError(f"Unknown model type: {args.type}")
85 |
86 | run_single_model_run(params, args.data_path, model_type, args.model_run)
87 |
88 |
89 | def init():
90 | """Method for calling main."""
91 | if __name__ == "__main__":
92 | main()
93 |
94 |
95 | init()
96 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "nhp-model"
3 | dynamic = ["version"]
4 | description = "New Hospital Programme demand model"
5 |
6 | requires-python = ">=3.11,<3.14"
7 |
8 | dependencies = [
9 | "azure-identity>=1.12",
10 | "azure-storage-blob>=12.15",
11 | "azure-storage-file-datalake>=12.10",
12 | "importlib-resources>=6.5.2",
13 | "jsonschema>=4.23.0",
14 | "numpy>=1.23",
15 | "pandas>=1.5",
16 | "pandas-flavor<0.8",
17 | "pandas-stubs>=2.3.2.250926",
18 | "pyarrow>=20.0",
19 | "pyjanitor>=0.23",
20 | "python-dotenv>=1.0",
21 | "scipy>=1.10",
22 | "tqdm>=4.65",
23 | ]
24 |
25 |
26 | # Explicitly specify which packages to include
27 | [tool.setuptools]
28 | package-dir = { "" = "src" }
29 |
30 | [tool.setuptools.packages.find]
31 | where = ["src"]
32 |
33 | [tool.setuptools.package-data]
34 | "nhp.model.data.reference" = ["*.csv", "*.json"]
35 | "nhp.model.params" = ["*.json"]
36 |
37 | [project.optional-dependencies]
38 | dev = [
39 | "coverage>=7.2",
40 | "ipykernel>=6.21",
41 | "ipython>=8.11",
42 | "ipywidgets>=8.0",
43 | "jupyter_client>=8.0",
44 | "jupyter_core>=5.2",
45 | "jupyterlab_pygments>=0.2",
46 | "jupyterlab_widgets>=3.0",
47 | "matplotlib>=3.7",
48 | "nbconvert>=7.2",
49 | "nbformat>=5.7",
50 | "notebook>=6.5",
51 | "pygam>=0.8",
52 | "pytest>=7.2",
53 | "pytest-cov>=4.0",
54 | "pytest-mock>=3.10",
55 | "ruff>=0.11.10",
56 | "setuptools-scm>=8.3.1",
57 | "snakeviz>=2.1",
58 | "widgetsnbextension>=4.0",
59 | ]
60 | databricks = [
61 | "pyspark",
62 | "databricks-connect"
63 | ]
64 | docs = [
65 | "mkdocs",
66 | "mkdocs-material",
67 | "mkdocstrings[python]",
68 | "mkdocs-gen-files",
69 | "mkdocs-literate-nav",
70 | "mkdocs-section-index"
71 | ]
72 |
73 | [build-system]
74 | requires = ["setuptools>=80", "setuptools-scm>=8", "wheel"]
75 | build-backend = "setuptools.build_meta"
76 |
77 | # Ruff configuration
78 | [tool.ruff]
79 | line-length = 100
80 | target-version = "py311"
81 | indent-width = 4
82 | exclude = ["docs"]
83 |
84 | [tool.ruff.lint.per-file-ignores]
85 | "tests/**.py" = [
86 | "D", # pydocstyle
87 | "PLC0415", # `import` should be at the top-level of a file
88 | "PLR2004", # Magic value used in comparison
89 | "PD901", # Avoid using the generic variable name `df`
90 | "RUF005", # list concatenation
91 | ]
92 |
93 | [tool.ruff.lint]
94 | # Simple rules: pylint + isort
95 | select = [
96 | "D", # pydocstyle
97 | "E", # pycodestyle errors
98 | "W", # pycodestyle warnings
99 | "I", # isort (import sorting)
100 | "PL", # pylint rules,
101 | "PD", # pandas-vet
102 | "NPY", # NumPy-specific rules
103 | "RUF", # Ruff-specific rules
104 | ]
105 |
106 | [tool.ruff.lint.isort]
107 | # isort configuration
108 | force-single-line = false
109 | combine-as-imports = true
110 |
111 | [tool.ruff.lint.pydocstyle]
112 | convention = "google"
113 |
114 | [tool.ruff.lint.pylint]
115 | max-args = 7
116 |
117 | [tool.ruff.format]
118 | quote-style = "double"
119 | indent-style = "space"
120 | skip-magic-trailing-comma = false
121 | line-ending = "auto"
122 |
123 | [tool.setuptools_scm]
124 | write_to = "src/nhp/model/_version.py"
125 | fallback_version = "0.0.0"
126 |
127 | [tool.ty.src]
128 | exclude = ["docs"]
129 |
130 | [tool.pytest.ini_options]
131 | testpaths=["tests/unit", "tests/integration"]
132 | python_files=["test_*.py"]
133 | norecursedirs=["docs", "*.egg-info", ".git", "appdir", ".tox", "__pycache__"]
134 |
--------------------------------------------------------------------------------
/src/nhp/model/data/local.py:
--------------------------------------------------------------------------------
1 | """NHP Data Loaders.
2 |
3 | Classes for loading data for the NHP model. Each class supports loading data from different sources,
4 | such as from local storage or directly from DataBricks.
5 | """
6 |
7 | import pickle
8 | from typing import Any, Callable
9 |
10 | import pandas as pd
11 |
12 | from nhp.model.data import Data
13 |
14 |
15 | class Local(Data):
16 | """Load NHP data from local storage."""
17 |
18 | def __init__(self, data_path: str, year: int, dataset: str):
19 | """Initialise Local data loader class."""
20 | self._data_path = data_path
21 | self._year = str(year)
22 | self._dataset = dataset
23 |
24 | def _file_path(self, file):
25 | return "/".join([self._data_path, file, f"fyear={self._year}", f"dataset={self._dataset}"])
26 |
27 | @staticmethod
28 | def create(data_path: str) -> Callable[[int, str], Any]:
29 | """Create Local Data object.
30 |
31 | Args:
32 | data_path: The path to where the data is stored locally.
33 |
34 | Returns:
35 | A function to initialise the object.
36 | """
37 | return lambda year, dataset: Local(data_path, year, dataset)
38 |
39 | def get_ip(self) -> pd.DataFrame:
40 | """Get the inpatients dataframe.
41 |
42 | Returns:
43 | The inpatients dataframe.
44 | """
45 | return self._get_parquet("ip")
46 |
47 | def get_ip_strategies(self) -> dict[str, pd.DataFrame]:
48 | """Get the inpatients strategies dataframe.
49 |
50 | Returns:
51 | The inpatients strategies dataframes.
52 | """
53 | return {
54 | i: self._get_parquet(f"ip_{i}_strategies")
55 | for i in ["activity_avoidance", "efficiencies"]
56 | }
57 |
58 | def get_op(self) -> pd.DataFrame:
59 | """Get the outpatients dataframe.
60 |
61 | Returns:
62 | The outpatients dataframe.
63 | """
64 | return self._get_parquet("op").rename(columns={"index": "rn"})
65 |
66 | def get_aae(self) -> pd.DataFrame:
67 | """Get the A&E dataframe.
68 |
69 | Returns:
70 | The A&E dataframe.
71 | """
72 | return self._get_parquet("aae").rename(columns={"index": "rn"})
73 |
74 | def get_birth_factors(self) -> pd.DataFrame:
75 | """Get the birth factors dataframe.
76 |
77 | Returns:
78 | The birth factors dataframe.
79 | """
80 | return self._get_parquet("birth_factors")
81 |
82 | def get_demographic_factors(self) -> pd.DataFrame:
83 | """Get the demographic factors dataframe.
84 |
85 | Returns:
86 | The demographic factors dataframe.
87 | """
88 | return self._get_parquet("demographic_factors")
89 |
90 | def get_hsa_activity_table(self) -> pd.DataFrame:
91 | """Get the demographic factors dataframe.
92 |
93 | Returns:
94 | The demographic factors dataframe.
95 | """
96 | return self._get_parquet("hsa_activity_tables")
97 |
98 | def get_hsa_gams(self) -> Any:
99 | """Get the health status adjustment gams.
100 |
101 | Returns:
102 | The health status adjustment gams.
103 | """
104 | with open(f"{self._data_path}/hsa_gams.pkl", "rb") as hsa_pkl:
105 | return pickle.load(hsa_pkl)
106 |
107 | def get_inequalities(self) -> pd.DataFrame:
108 | """Get the inequalities dataframe.
109 |
110 | Returns:
111 | The inequalities dataframe.
112 | """
113 | return self._get_parquet("inequalities")
114 |
115 | def _get_parquet(self, file: str) -> pd.DataFrame:
116 | """Load specific parquet file using Pandas.
117 |
118 | Args:
119 | file: Specific parquet filename to open.
120 |
121 | Returns:
122 | DataFrame containing the data.
123 | """
124 | inequalities_df = pd.read_parquet(self._file_path(file))
125 | return inequalities_df
126 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test__main__.py:
--------------------------------------------------------------------------------
1 | """Test run_model.py."""
2 |
3 | from unittest.mock import Mock, patch
4 |
5 | import pytest
6 |
7 | from nhp.model.__main__ import main
8 | from nhp.model.aae import AaEModel
9 | from nhp.model.inpatients import InpatientsModel
10 | from nhp.model.outpatients import OutpatientsModel
11 |
12 |
13 | @pytest.mark.parametrize(
14 | "activity_type, model_class",
15 | [("aae", AaEModel), ("ip", InpatientsModel), ("op", OutpatientsModel)],
16 | )
17 | def test_main_debug_runs_model(mocker, activity_type, model_class):
18 | # arrange
19 | args = Mock()
20 | args.type = activity_type
21 | args.data_path = "data"
22 | args.model_run = 0
23 | args.params_file = "params.json"
24 | mocker.patch("nhp.model.__main__._parse_args", return_value=args)
25 | ldp_mock = mocker.patch("nhp.model.__main__.load_params", return_value="params")
26 |
27 | run_all_mock = mocker.patch("nhp.model.__main__.run_all")
28 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run")
29 |
30 | # act
31 | main()
32 |
33 | # assert
34 | run_all_mock.assert_not_called()
35 | run_single_mock.assert_called_once_with("params", "data", model_class, 0)
36 | ldp_mock.assert_called_once_with("params.json")
37 |
38 |
39 | def test_main_can_use_sample_params(mocker):
40 | # arrange
41 | args = Mock()
42 | args.type = "ip"
43 | args.data_path = "data"
44 | args.model_run = 0
45 | args.params_file = ""
46 | mocker.patch("nhp.model.__main__._parse_args", return_value=args)
47 | ldp_mock = mocker.patch("nhp.model.__main__.load_params", return_value="params")
48 | ldsp_mock = mocker.patch("nhp.model.__main__.load_sample_params", return_value="params")
49 |
50 | run_all_mock = mocker.patch("nhp.model.__main__.run_all")
51 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run")
52 |
53 | # act
54 | main()
55 |
56 | # assert
57 | run_all_mock.assert_not_called()
58 | run_single_mock.assert_called_once_with("params", "data", InpatientsModel, 0)
59 | ldp_mock.assert_not_called()
60 | ldsp_mock.assert_called_once()
61 |
62 |
63 | def test_main_debug_runs_model_invalid_type(mocker):
64 | # arrange
65 | args = Mock()
66 | args.type = "invalid"
67 | args.data_path = "data"
68 | args.model_run = 0
69 | args.params_file = "queue/params.json"
70 | mocker.patch("nhp.model.__main__._parse_args", return_value=args)
71 | mocker.patch("nhp.model.__main__.load_params", return_value="params")
72 |
73 | run_all_mock = mocker.patch("nhp.model.__main__.run_all")
74 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run")
75 |
76 | # act
77 | with pytest.raises(ValueError):
78 | main()
79 |
80 | # assert
81 | run_all_mock.assert_not_called()
82 | run_single_mock.assert_not_called()
83 |
84 |
85 | def test_main_all_runs(mocker):
86 | # arrange
87 | args = Mock()
88 | args.type = "all"
89 | args.data_path = "data"
90 | args.params_file = "queue/params.json"
91 | args.save_full_model_results = False
92 | mocker.patch("nhp.model.__main__._parse_args", return_value=args)
93 | ldp_mock = mocker.patch("nhp.model.__main__.load_params", return_value="params")
94 | local_data_mock = mocker.patch("nhp.model.__main__.Local")
95 | local_data_mock.create.return_value = "data"
96 |
97 | run_all_mock = mocker.patch("nhp.model.__main__.run_all")
98 | run_single_mock = mocker.patch("nhp.model.__main__.run_single_model_run")
99 |
100 | # act
101 | main()
102 |
103 | # assert
104 | run_all_mock.assert_called_once()
105 | assert run_all_mock.call_args[0][0] == "params"
106 | assert run_all_mock.call_args[0][1] == "data"
107 | assert run_all_mock.call_args[0][2]("a")(0) is None
108 |
109 | run_single_mock.assert_not_called()
110 | ldp_mock.assert_called_once_with("queue/params.json")
111 | local_data_mock.create.assert_called_once_with("data")
112 |
113 |
114 | def test_init(mocker):
115 | """It should run the main method if __name__ is __main__."""
116 | import nhp.model.__main__ as r
117 |
118 | main_mock = mocker.patch("nhp.model.__main__.main")
119 |
120 | r.init() # should't call main
121 | main_mock.assert_not_called()
122 |
123 | with patch.object(r, "__name__", "__main__"):
124 | r.init() # should call main
125 | main_mock.assert_called_once()
126 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # The New Hospital Programme Demand Model
2 |
3 |
4 |
5 | [](https://www.repostatus.org/#active) [](https://codecov.io/gh/The-Strategy-Unit/nhp_model)
8 |
9 |
10 |
11 | ## Welcome
12 |
13 | Welcome to the [New Hospital Programme demand and capacity modelling tool](https://www.strategyunitwm.nhs.uk/new-hospital-programme-demand-model).
14 |
15 |
16 |
17 | This repository contains the model code but there are several other repositories which contain useful tools to [explore the data underpinning and set the parameters for the model](https://github.com/The-Strategy-Unit/nhp_inputs), as well as to [explore model outputs](https://github.com/The-Strategy-Unit/nhp_outputs). [An overview of how the different tools interact with each other is available](https://connect.strategyunitwm.nhs.uk/nhp/project_information/project_plan_and_summary/components-overview.html).
18 |
19 | The methodology underpinning this model is outlined in this [simple one page explainer](https://connect.strategyunitwm.nhs.uk/nhp_model_explainer/). We have a more technical [project information site](https://connect.strategyunitwm.nhs.uk/nhp/project_information/) which includes further details about the model and the data that the model was built on.
20 |
21 | ## Running the model
22 |
23 | ### For external users
24 |
25 | Although all the code is available openly, it is challenging to run the model if you do not have access to the data and infrastructure at the Strategy Unit.
26 |
27 | We use national [Hospital Episode Statistics](https://digital.nhs.uk/services/data-access-request-service-dars/dars-products-and-services/data-set-catalogue/hospital-episode-statistics) data which goes through extensive processing, as detailed in the [nhp_data repository](https://github.com/The-Strategy-Unit/nhp_data).
28 | Some of the types of potentially mitigable activity rely on having access to the full national dataset, not just a local dataset.
29 | Without this data and infrastructure, your data will not be correctly formatted to run in the model.
30 |
31 | [We are working on providing synthetic data](https://github.com/The-Strategy-Unit/nhp_model/issues/347) so that interested parties can run the model locally to see how it works.
32 |
33 | Prospective users of the model should [contact the Strategy Unit](mailto:strategy.unit@nhs.net) to enquire about using the model on our existing infrastructure.
34 |
35 | Please note that it is important that the parameters of the model are set with great care and with proper support. It is important also that healthcare system partners are appropriately involved in parameter setting. For a description of the full process and support provision that is necessary to ensure the model functions well please see the [NHS Futures workspace](https://future.nhs.uk/NewHospitalProgrammeDigital/browseFolder?fid=53572528&done=OBJChangesSaved)
36 |
37 | ### For internal users with full access to correctly formatted data
38 |
39 | Assuming you have your data in the correct format, store it in the `data` folder. [Further details on the correct formatting for the data to follow](https://github.com/The-Strategy-Unit/nhp_model/issues/419).
40 |
41 | The model runs using parameters that are set in a [JSON file](#json-schema).
42 |
43 | ### Running the model using `uv`
44 |
45 | This package is built using [`uv`](https://docs.astral.sh/uv/). If you have `uv` installed, run the model using: `uv run -m nhp.model path/to/params.json -d path/to/data`
46 |
47 | ### Running the model without `uv`
48 |
49 | 1. Install the `nhp_model` package using `pip install .`
50 | 1. Run the model using: `python -m nhp.model path/to/params.json -d path/to/data`
51 |
52 | ## Deployment
53 |
54 | The model is deployed to Azure Container Registry and GitHub Container Registry on pull requests, tagging the container as `nhp_model:dev`, and on releases its deployed to `nhp_model:v*.*.*` and `nhp_model:latest`.
55 |
56 | ## JSON Schema
57 |
58 | Parameters for the model are set in JSON format; an example can be seen in `src/nhp/model/params/params-sample.json`. As the model develops, requirements for this JSON file change over time. We use [JSON schema](https://json-schema.org/understanding-json-schema/about) to manage changes to the parameters file. From model v3.5 onwards, these are deployed to GitHub pages, following this pattern:
59 |
60 | - on merge to `main`, the schema is deployed to `https://the-strategy-unit.github.io/nhp_model/dev/params-schema.json`
61 | - on release of new model version vX.X, the schema is deployed to `https://the-strategy-unit.github.io/nhp_model/vX.X/params-schema.json`
62 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/data/test_local.py:
--------------------------------------------------------------------------------
1 | """test nhp data (local)."""
2 |
3 | from unittest.mock import call, mock_open, patch
4 |
5 | import pandas as pd
6 |
7 | from nhp.model.data import Local
8 |
9 |
10 | def test_init_sets_values():
11 | # arrange
12 |
13 | # act
14 | d = Local("data", 2019, "synthetic")
15 |
16 | # assert
17 | assert d._data_path == "data"
18 |
19 |
20 | def test_file_path():
21 | # arrange
22 |
23 | # act
24 | d = Local("data", 2019, "synthetic")
25 |
26 | # assert
27 | assert d._file_path("ip") == "data/ip/fyear=2019/dataset=synthetic"
28 |
29 |
30 | def test_create_returns_lambda():
31 | # arrange
32 |
33 | # act
34 | d = Local.create("data")(2019, "synthetic")
35 |
36 | # assert
37 | assert d._data_path == "data"
38 |
39 |
40 | def test_get_ip(mocker):
41 | # arrange
42 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data")
43 | d = Local("data", 2019, "synthetic")
44 |
45 | # act
46 | actual = d.get_ip()
47 |
48 | # assert
49 | assert actual == "data"
50 | m.assert_called_once_with("ip")
51 |
52 |
53 | def test_get_ip_strategies(mocker):
54 | # arrange
55 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data")
56 | d = Local("data", 2019, "synthetic")
57 |
58 | # act
59 | actual = d.get_ip_strategies()
60 |
61 | # assert
62 | assert actual == {"activity_avoidance": "data", "efficiencies": "data"}
63 | assert m.call_count == 2
64 | assert list(m.call_args_list) == [
65 | call("ip_activity_avoidance_strategies"),
66 | call("ip_efficiencies_strategies"),
67 | ]
68 |
69 |
70 | def test_get_op(mocker):
71 | # arrange
72 | op_data = pd.DataFrame({"col_1": [1, 2], "col_2": [3, 4], "index": [5, 6]}, index=[2, 1])
73 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value=op_data)
74 | d = Local("data", 2019, "synthetic")
75 |
76 | # act
77 | actual = d.get_op()
78 |
79 | # assert
80 | assert actual.col_1.to_list() == [1, 2]
81 | assert actual.col_2.to_list() == [3, 4]
82 | assert actual.rn.to_list() == [5, 6]
83 | m.assert_called_once_with("op")
84 |
85 |
86 | def test_get_aae(mocker):
87 | # arrange
88 | ae_data = pd.DataFrame({"col_1": [1, 2], "col_2": [3, 4], "index": [5, 6]}, index=[2, 1])
89 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value=ae_data)
90 | d = Local("data", 2019, "synthetic")
91 |
92 | # act
93 | actual = d.get_aae()
94 |
95 | # assert
96 | assert actual.col_1.to_list() == [1, 2]
97 | assert actual.col_2.to_list() == [3, 4]
98 | assert actual.rn.to_list() == [5, 6]
99 | m.assert_called_once_with("aae")
100 |
101 |
102 | def test_get_birth_factors(mocker):
103 | # arrange
104 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data")
105 | d = Local("data", 2019, "synthetic")
106 |
107 | # act
108 | actual = d.get_birth_factors()
109 |
110 | # assert
111 | assert actual == "data"
112 | m.assert_called_once_with("birth_factors")
113 |
114 |
115 | def test_get_demographic_factors(mocker):
116 | # arrange
117 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data")
118 | d = Local("data", 2019, "synthetic")
119 |
120 | # act
121 | actual = d.get_demographic_factors()
122 |
123 | # assert
124 | assert actual == "data"
125 | m.assert_called_once_with("demographic_factors")
126 |
127 |
128 | def test_get_hsa_activity_table(mocker):
129 | # arrange
130 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data")
131 | d = Local("data", 2019, "synthetic")
132 |
133 | # act
134 | actual = d.get_hsa_activity_table()
135 |
136 | # assert
137 | assert actual == "data"
138 | m.assert_called_once_with("hsa_activity_tables")
139 |
140 |
141 | def test_get_hsa_gams(mocker):
142 | # arrange
143 | m = mocker.patch("pickle.load", return_value="data")
144 | d = Local("data", 2019, "synthetic")
145 |
146 | # act
147 | with patch("builtins.open", mock_open(read_data="hsa_gams")) as mock_file:
148 | actual = d.get_hsa_gams()
149 |
150 | # assert
151 | assert actual == "data"
152 | mock_file.assert_called_with("data/hsa_gams.pkl", "rb")
153 | m.assert_called_once_with(mock_file())
154 |
155 |
156 | def test_get_inequalities(mocker):
157 | # arrange
158 | m = mocker.patch("nhp.model.data.Local._get_parquet", return_value="data")
159 | d = Local("data", 2019, "synthetic")
160 |
161 | # act
162 | actual = d.get_inequalities()
163 |
164 | # assert
165 | assert actual == "data"
166 | m.assert_called_once_with("inequalities")
167 |
168 |
169 | def test_get_parquet(mocker):
170 | # arrange
171 | fp = mocker.patch("nhp.model.data.Local._file_path", return_value="file_path")
172 | m = mocker.patch("pandas.read_parquet", return_value="data")
173 | d = Local("data", 2019, "synthetic")
174 |
175 | # act
176 | actual = d._get_parquet("file")
177 |
178 | # assert
179 | assert actual == "data"
180 | fp.assert_called_once_with("file")
181 | m.assert_called_once_with("file_path")
182 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 | .RDataTmp
8 |
9 | # User-specific files
10 | .Ruserdata
11 |
12 | # Example code in package build process
13 | *-Ex.R
14 |
15 | # Output files from R CMD build
16 | /*.tar.gz
17 |
18 | # Output files from R CMD check
19 | /*.Rcheck/
20 |
21 | # RStudio files
22 | .Rproj.user/
23 |
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 |
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 |
31 | # knitr and R markdown default cache directories
32 | *_cache/
33 | /cache/
34 |
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 |
39 | # Environment Variables
40 | .Renviron
41 | .env
42 |
43 | # translation temp files
44 | po/*~
45 |
46 | # RStudio Connect folder
47 | rsconnect/
48 | .Rproj.user
49 |
50 | # ignore test/data/results folders
51 | test/
52 | data/
53 | results/
54 | run_results/
55 |
56 | # ignore any potential data/artifacts
57 | *.zip
58 | *.parquet
59 |
60 | # ignore shiny cache directory
61 | outputs/.cache
62 |
63 | # pyinstaller folders
64 | build/
65 | dist/
66 |
67 | # covergage db
68 | .coverage
69 | coverage.xml
70 |
71 | docs/_build
72 |
73 | # ignore targets meta data
74 | _targets*/
75 |
76 | # override the data rule for model/data
77 | !src/nhp/model/data
78 | !tests/unit/nhp/model/data
79 |
80 | # ignore schemas/ this is a worktree
81 | schemas/
82 |
83 | ## Python
84 | .git
85 |
86 | # Byte-compiled / optimized / DLL files
87 | __pycache__/
88 | __PYCACHE__
89 | *.py[cod]
90 | *$py.class
91 |
92 | # C extensions
93 | *.so
94 |
95 | # Distribution / packaging
96 | .Python
97 | build/
98 | develop-eggs/
99 | dist/
100 | downloads/
101 | eggs/
102 | .eggs/
103 | lib/
104 | lib64/
105 | parts/
106 | sdist/
107 | var/
108 | wheels/
109 | share/python-wheels/
110 | *.egg-info/
111 | .installed.cfg
112 | *.egg
113 | MANIFEST
114 |
115 | # PyInstaller
116 | # Usually these files are written by a python script from a template
117 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
118 | *.manifest
119 | *.spec
120 |
121 | # Installer logs
122 | pip-log.txt
123 | pip-delete-this-directory.txt
124 |
125 | # Unit test / coverage reports
126 | htmlcov/
127 | .tox/
128 | .nox/
129 | .coverage
130 | .coverage.*
131 | .cache
132 | nosetests.xml
133 | coverage.xml
134 | *.cover
135 | *.py,cover
136 | .hypothesis/
137 | .pytest_cache/
138 | cover/
139 |
140 | # Translations
141 | *.mo
142 | *.pot
143 |
144 | # Django stuff:
145 | *.log
146 | local_settings.py
147 | db.sqlite3
148 | db.sqlite3-journal
149 |
150 | # Flask stuff:
151 | instance/
152 | .webassets-cache
153 |
154 | # Scrapy stuff:
155 | .scrapy
156 |
157 | # Sphinx documentation
158 | docs/_build/
159 |
160 | # PyBuilder
161 | .pybuilder/
162 | target/
163 |
164 | # Jupyter Notebook
165 | .ipynb_checkpoints
166 |
167 | # IPython
168 | profile_default/
169 | ipython_config.py
170 |
171 | # pyenv
172 | # For a library or package, you might want to ignore these files since the code is
173 | # intended to run in multiple environments; otherwise, check them in:
174 | # .python-version
175 |
176 | # pipenv
177 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
178 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
179 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
180 | # install all needed dependencies.
181 | #Pipfile.lock
182 |
183 | # UV
184 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
185 | # This is especially recommended for binary packages to ensure reproducibility, and is more
186 | # commonly ignored for libraries.
187 | #uv.lock
188 |
189 | # poetry
190 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
191 | # This is especially recommended for binary packages to ensure reproducibility, and is more
192 | # commonly ignored for libraries.
193 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
194 | #poetry.lock
195 |
196 | # pdm
197 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
198 | #pdm.lock
199 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
200 | # in version control.
201 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
202 | .pdm.toml
203 | .pdm-python
204 | .pdm-build/
205 |
206 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
207 | __pypackages__/
208 |
209 | # Celery stuff
210 | celerybeat-schedule
211 | celerybeat.pid
212 |
213 | # SageMath parsed files
214 | *.sage.py
215 |
216 | # Environments
217 | .env
218 | .venv
219 | env/
220 | venv/
221 | ENV/
222 | env.bak/
223 | venv.bak/
224 |
225 | # Spyder project settings
226 | .spyderproject
227 | .spyproject
228 |
229 | # Rope project settings
230 | .ropeproject
231 |
232 | # mkdocs documentation
233 | /site
234 |
235 | # mypy
236 | .mypy_cache/
237 | .dmypy.json
238 | dmypy.json
239 |
240 | # Pyre type checker
241 | .pyre/
242 |
243 | # pytype static type analyzer
244 | .pytype/
245 |
246 | # Cython debug symbols
247 | cython_debug/
248 |
249 | # PyCharm
250 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
251 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
252 | # and can be added to the global gitignore or merged into this file. For a more nuclear
253 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
254 | #.idea/
255 |
256 | # PyPI configuration file
257 | .pypirc
258 | src/nhp/model/_version.py
259 |
--------------------------------------------------------------------------------
/tests/unit/nhp/docker/test___main__.py:
--------------------------------------------------------------------------------
1 | """test docker run."""
2 |
3 | from datetime import datetime
4 | from unittest.mock import Mock, patch
5 |
6 | import pytest
7 |
8 | from nhp.docker.__main__ import main, parse_args
9 |
10 |
11 | @pytest.mark.parametrize(
12 | "args, expected_file, expected_local_storage, expected_save_full_model_results",
13 | [
14 | (["test.json"], "test.json", False, False),
15 | (["test.json", "-l"], "test.json", True, False),
16 | (["test.json"], "test.json", False, False),
17 | (["test.json", "-l"], "test.json", True, False),
18 | (["test.json", "--save-full-model-results"], "test.json", False, True),
19 | ],
20 | )
21 | def test_parse_args(
22 | mocker,
23 | args,
24 | expected_file,
25 | expected_local_storage,
26 | expected_save_full_model_results,
27 | ):
28 | # arrange
29 | mocker.patch("sys.argv", ["nhp.docker.run.py"] + args)
30 |
31 | # act
32 | actual = parse_args()
33 |
34 | # assert
35 | assert actual.params_file == expected_file
36 | assert actual.local_storage == expected_local_storage
37 | assert actual.save_full_model_results == expected_save_full_model_results
38 |
39 |
40 | def test_main_local(mocker):
41 | # arrange
42 | m = mocker.patch("nhp.docker.__main__.parse_args")
43 | m().params_file = "params.json"
44 | m().local_storage = True
45 | m().save_full_model_results = False
46 |
47 | m_start_time = datetime(2025, 1, 1, 12, 0, 0)
48 | m_end_time = datetime(2025, 1, 1, 12, 0, 2)
49 | m_datetime = mocker.patch("nhp.docker.__main__.datetime")
50 | m_datetime.now.side_effect = [m_start_time, m_end_time]
51 |
52 | rwls = mocker.patch("nhp.docker.__main__.RunWithLocalStorage")
53 | rwas = mocker.patch("nhp.docker.__main__.RunWithAzureStorage")
54 |
55 | local_data_mock = mocker.patch("nhp.docker.__main__.Local")
56 | local_data_mock.create.return_value = "data"
57 |
58 | params = {
59 | "model_runs": 256,
60 | "start_year": 2019,
61 | "end_year": 2035,
62 | "app_version": "dev",
63 | }
64 |
65 | rwls().params = params
66 | rwls.reset_mock()
67 |
68 | ru_m = mocker.patch(
69 | "nhp.docker.__main__.run_all", return_value=("list_of_results", "results.json")
70 | )
71 |
72 | expected_additional_metadata = {
73 | "model_run_start_time": m_start_time.isoformat(),
74 | "model_run_end_time": m_end_time.isoformat(),
75 | "model_run_elapsed_time_seconds": 2.0,
76 | }
77 |
78 | # act
79 | main()
80 |
81 | # assert
82 | rwls.assert_called_once_with("params.json")
83 | rwas.assert_not_called()
84 |
85 | s = rwls()
86 | ru_m.assert_called_once_with(params, "data", s.progress_callback(), False)
87 | s.finish.assert_called_once_with(
88 | "results.json", "list_of_results", False, expected_additional_metadata
89 | )
90 |
91 | local_data_mock.create.assert_called_once_with("data")
92 |
93 |
94 | def test_main_azure(mocker):
95 | # arrange
96 | m = mocker.patch("nhp.docker.__main__.parse_args")
97 | m().params_file = "params.json"
98 | m().local_storage = False
99 | m().save_full_model_results = False
100 |
101 | m_start_time = datetime(2025, 1, 1, 12, 0, 0)
102 | m_end_time = datetime(2025, 1, 1, 12, 0, 2)
103 | m_datetime = mocker.patch("nhp.docker.__main__.datetime")
104 | m_datetime.now.side_effect = [m_start_time, m_end_time]
105 |
106 | rwls = mocker.patch("nhp.docker.__main__.RunWithLocalStorage")
107 | rwas = mocker.patch("nhp.docker.__main__.RunWithAzureStorage")
108 |
109 | local_data_mock = mocker.patch("nhp.docker.__main__.Local")
110 | local_data_mock.create.return_value = "data"
111 |
112 | config = Mock()
113 | config.APP_VERSION = "dev"
114 | config.DATA_VERSION = "dev"
115 | config.STORAGE_ACCOUNT = "sa"
116 |
117 | params = {
118 | "model_runs": 256,
119 | "start_year": 2019,
120 | "end_year": 2035,
121 | "app_version": "dev",
122 | }
123 |
124 | rwas().params = params
125 | rwas.reset_mock()
126 |
127 | ru_m = mocker.patch(
128 | "nhp.docker.__main__.run_all", return_value=("list_of_results", "results.json")
129 | )
130 |
131 | expected_additional_metadata = {
132 | "model_run_start_time": m_start_time.isoformat(),
133 | "model_run_end_time": m_end_time.isoformat(),
134 | "model_run_elapsed_time_seconds": 2.0,
135 | }
136 |
137 | # act
138 | main(config)
139 |
140 | # assert
141 | rwls.assert_not_called()
142 | rwas.assert_called_once_with("params.json", config)
143 |
144 | s = rwas()
145 | ru_m.assert_called_once_with(params, "data", s.progress_callback(), False)
146 | s.finish.assert_called_once_with(
147 | "results.json", "list_of_results", False, expected_additional_metadata
148 | )
149 |
150 | local_data_mock.create.assert_called_once_with("data")
151 |
152 |
153 | def test_init(mocker):
154 | """It should run the main method if __name__ is __main__."""
155 | config = mocker.patch("nhp.docker.__main__.Config")
156 |
157 | import nhp.docker.__main__ as r
158 |
159 | main_mock = mocker.patch("nhp.docker.__main__.main")
160 |
161 | r.init() # should't call main
162 | main_mock.assert_not_called()
163 |
164 | with patch.object(r, "__name__", "__main__"):
165 | r.init() # should call main
166 | main_mock.assert_called_once_with(config())
167 |
168 |
169 | def test_init_catches_exception(mocker):
170 | # arrange
171 | mocker.patch("nhp.docker.__main__.main", side_effect=Exception("Test error"))
172 | import nhp.docker.__main__ as r
173 |
174 | m = mocker.patch("logging.error")
175 |
176 | # act
177 | with patch.object(r, "__name__", "__main__"):
178 | with pytest.raises(Exception, match="Test error"):
179 | r.init()
180 |
181 | # assert
182 | m.assert_called_once_with("An error occurred: %s", "Test error")
183 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test_run.py:
--------------------------------------------------------------------------------
1 | """Test run_model.py."""
2 |
3 | from unittest.mock import Mock, call
4 |
5 | import pandas as pd
6 |
7 | from nhp.model.aae import AaEModel
8 | from nhp.model.inpatients import InpatientsModel
9 | from nhp.model.outpatients import OutpatientsModel
10 | from nhp.model.run import (
11 | _run_model,
12 | noop_progress_callback,
13 | run_all,
14 | run_single_model_run,
15 | timeit,
16 | tqdm,
17 | )
18 |
19 |
20 | def test_tqdm():
21 | tqdm.progress_callback = Mock() # type: ignore
22 | t = tqdm()
23 | t.update(5)
24 | tqdm.progress_callback.assert_called_once_with(5) # type: ignore
25 |
26 |
27 | def test_tqdm_no_callback():
28 | tqdm.progress_callback = None
29 | t = tqdm()
30 | t.update(5)
31 |
32 |
33 | def test_timeit(mocker, capsys):
34 | """It should evaluate a function and print how long it took to run it."""
35 | # arrange
36 | m = Mock(return_value="function")
37 | mocker.patch("time.time", return_value=0)
38 | # act
39 | actual = timeit(m, 1, 2, 3)
40 | # assert
41 | assert actual == "function"
42 | assert capsys.readouterr().out == "elapsed: 0.000s\n"
43 |
44 |
45 | def test_run_model(mocker):
46 | # arrange
47 | model_m = Mock()
48 | model_m.__name__ = "InpatientsModel"
49 |
50 | params = {"start_year": 2020, "end_year": 2022, "model_runs": 2}
51 | mocker.patch("os.cpu_count", return_value=2)
52 |
53 | pool_ctx_mock = mocker.patch("multiprocessing.get_context")
54 | pool_mock = pool_ctx_mock().Pool
55 | pool_ctm = pool_mock.return_value.__enter__.return_value
56 | pool_ctm.name = "pool"
57 | pool_ctm.imap = Mock(wraps=lambda f, i, **kwargs: map(f, i))
58 |
59 | pc_m = Mock()
60 |
61 | pool_ctx_mock.reset_mock()
62 |
63 | # act
64 | actual = _run_model(model_m, params, "data", "hsa", "run_params", pc_m, False) # type: ignore
65 |
66 | # assert
67 | pool_ctm.imap.assert_called_once_with(model_m().go, [1, 2], chunksize=1)
68 | assert actual == [model_m().go()] * 3
69 | pc_m.assert_called_once_with(2)
70 |
71 | pool_ctx_mock.assert_called_once_with("spawn")
72 | pool_mock.assert_called_once_with(2)
73 |
74 |
75 | def test_noop_progress_callback():
76 | # arrange, act & assert
77 | assert not noop_progress_callback("a")("b")
78 |
79 |
80 | def test_run_all(mocker):
81 | # arrange
82 | grp_m = mocker.patch(
83 | "nhp.model.run.Model.generate_run_params",
84 | return_value={"variant": "variants"},
85 | )
86 | hsa_m = mocker.patch("nhp.model.run.HealthStatusAdjustmentInterpolated", return_value="hsa")
87 |
88 | rm_m = mocker.patch("nhp.model.run._run_model", side_effect=["ip", "op", "aae"])
89 | cr_m = mocker.patch(
90 | "nhp.model.run.combine_results",
91 | return_value=({"default": "combined_results"}, "combined_step_counts"),
92 | )
93 | gr_m = mocker.patch("nhp.model.run.generate_results_json", return_value="results_json_path")
94 | sr_m = mocker.patch("nhp.model.run.save_results_files", return_value="results_paths")
95 |
96 | pc_m = Mock()
97 | pc_m().return_value = "progress callback"
98 | pc_m.reset_mock()
99 |
100 | params = {
101 | "id": "1",
102 | "dataset": "synthetic",
103 | "scenario": "test",
104 | "start_year": 2020,
105 | "end_year": 2025,
106 | "model_runs": 10,
107 | "create_datetime": "20230123_012345",
108 | }
109 | data_mock = Mock(return_value="nhp_data")
110 |
111 | # act
112 | actual = run_all(params, data_mock, pc_m, False)
113 |
114 | # assert
115 | assert actual == ("results_paths", "results_json_path")
116 |
117 | data_mock.assert_called_once_with(2020, "synthetic")
118 |
119 | assert pc_m.call_args_list == [
120 | call("Inpatients"),
121 | call("Outpatients"),
122 | call("AaE"),
123 | ]
124 |
125 | grp_m.assert_called_once_with(params)
126 | hsa_m.assert_called_once_with("nhp_data", 2020)
127 |
128 | assert rm_m.call_args_list == [
129 | call(
130 | m,
131 | params,
132 | data_mock,
133 | "hsa",
134 | {"variant": "variants"},
135 | pc_m(),
136 | False,
137 | )
138 | for m in [InpatientsModel, OutpatientsModel, AaEModel]
139 | ]
140 |
141 | cr_m.assert_called_once_with(["ip", "op", "aae"])
142 | gr_m.assert_called_once_with(
143 | {"default": "combined_results", "step_counts": "combined_step_counts"},
144 | "combined_step_counts",
145 | params,
146 | {"variant": "variants"},
147 | )
148 | sr_m.assert_called_once_with(
149 | {"default": "combined_results", "step_counts": "combined_step_counts"}, params
150 | )
151 |
152 |
153 | def test_run_single_model_run(mocker, capsys):
154 | """It should run the model and display outputs."""
155 | # arrange
156 | mr_mock = Mock()
157 | ndl_mock = mocker.patch("nhp.model.run.Local")
158 | ndl_mock.create.return_value = "nhp_data"
159 |
160 | results_m = {
161 | "default": pd.DataFrame(
162 | {
163 | "pod": ["a", "b"] * 4 + ["c"],
164 | "measure": [i for i in ["x", "y"] for _ in [1, 2]] * 2 + ["x"],
165 | "value": range(9),
166 | }
167 | )
168 | }
169 | step_counts_m = pd.DataFrame(
170 | {
171 | "change_factor": ["a", "b"] * 4 + ["c"],
172 | "measure": [i for i in ["x", "y"] for _ in [1, 2]] * 2 + ["x"],
173 | "value": range(9),
174 | }
175 | )
176 |
177 | timeit_mock = mocker.patch(
178 | "nhp.model.run.timeit",
179 | side_effect=[None, mr_mock, (results_m, step_counts_m)],
180 | )
181 | params = {"dataset": "synthetic", "start_year": 2020, "end_year": 2025}
182 |
183 | # act
184 | run_single_model_run(params, "data", "model_type", 0) # type: ignore
185 |
186 | # assert
187 | ndl_mock.create.assert_called_once_with("data")
188 |
189 | assert timeit_mock.call_count == 3
190 | assert timeit_mock.call_args_list[0] == call("model_type", params, "nhp_data")
191 | assert timeit_mock.call_args_list[2] == call(mr_mock.get_aggregate_results)
192 |
193 | assert capsys.readouterr().out == "\n".join(
194 | [
195 | "initialising model... running model... aggregating results... ",
196 | "change factors:",
197 | " value ",
198 | "measure x y",
199 | "change_factor ",
200 | "a 4 8",
201 | "b 6 10",
202 | "c 8 0",
203 | "total 18 18",
204 | "",
205 | "aggregated (default) results:",
206 | " value ",
207 | "measure x y",
208 | "pod ",
209 | "a 4.0 8.0",
210 | "b 6.0 10.0",
211 | "c 8.0 0.0",
212 | "total 18.0 18.0",
213 | "",
214 | ]
215 | )
216 |
--------------------------------------------------------------------------------
/src/nhp/model/run.py:
--------------------------------------------------------------------------------
1 | """Run the model."""
2 |
3 | import logging
4 | import multiprocessing
5 | import os
6 | import time
7 | from typing import Any, Callable, Tuple, Type
8 |
9 | from tqdm.auto import tqdm as base_tqdm
10 |
11 | from nhp.model.aae import AaEModel
12 | from nhp.model.data import Data, Local
13 | from nhp.model.health_status_adjustment import HealthStatusAdjustmentInterpolated
14 | from nhp.model.inpatients import InpatientsModel
15 | from nhp.model.model import Model
16 | from nhp.model.model_iteration import ModelIteration, ModelRunResult
17 | from nhp.model.outpatients import OutpatientsModel
18 | from nhp.model.results import combine_results, generate_results_json, save_results_files
19 |
20 |
21 | class tqdm(base_tqdm): # ty: ignore[unsupported-base]
22 | """Custom tqdm class that provides a callback function on update."""
23 |
24 | # ideally this would be set in the contstructor, but as this is a pretty
25 | # simple use case just implemented as a static variable. this does mean that
26 | # you need to update the value before using the class (each time)
27 | progress_callback = None
28 |
29 | def update(self, n=1):
30 | """Overide the default tqdm update function to run the callback method."""
31 | super().update(n)
32 | if tqdm.progress_callback:
33 | tqdm.progress_callback(self.n)
34 |
35 |
36 | def timeit(func: Callable, *args) -> Any:
37 | """Time how long it takes to evaluate function `f` with arguments `*args`."""
38 | start = time.time()
39 | results = func(*args)
40 | print(f"elapsed: {time.time() - start:.3f}s")
41 | return results
42 |
43 |
44 | def _run_model(
45 | model_type: Type[Model],
46 | params: dict,
47 | data: Callable[[int, str], Data],
48 | hsa: Any,
49 | run_params: dict,
50 | progress_callback: Callable[[Any], None],
51 | save_full_model_results: bool,
52 | ) -> list[ModelRunResult]:
53 | """Run the model iterations.
54 |
55 | Runs the model for all of the model iterations, returning the aggregated results.
56 |
57 | Args:
58 | model_type: The type of model that we want to run.
59 | params: The parameters to run the model with.
60 | data: A callable that creates a Data instance.
61 | hsa: An instance of the HealthStatusAdjustment class.
62 | run_params: The generated run parameters for the model run.
63 | progress_callback: A callback function for progress updates.
64 | save_full_model_results: Whether to save full model results.
65 |
66 | Returns:
67 | A list containing the aggregated results for all model runs.
68 | """
69 | model_class = model_type.__name__[:-5]
70 | logging.info("%s", model_class)
71 | logging.info(" * instantiating")
72 | # ignore type issues here: Model has different arguments to Inpatients/Outpatients/A&E
73 | model = model_type(params, data, hsa, run_params, save_full_model_results) # type: ignore
74 | logging.info(" * running")
75 |
76 | # set the progress callback for this run
77 | tqdm.progress_callback = progress_callback
78 |
79 | # model run 0 is the baseline
80 | # model run 1:n are the monte carlo sims
81 | model_runs = [i + 1 for i in range(params["model_runs"])]
82 |
83 | cpus = os.cpu_count()
84 | batch_size = int(os.getenv("BATCH_SIZE", "1"))
85 |
86 | ctx = multiprocessing.get_context("spawn")
87 | with ctx.Pool(cpus) as pool:
88 | baseline = model.go(0) # baseline
89 | model_results: list[ModelRunResult] = list(
90 | tqdm(
91 | pool.imap(
92 | model.go,
93 | model_runs,
94 | chunksize=batch_size,
95 | ),
96 | f"Running {model.__class__.__name__[:-5].rjust(11)} model",
97 | total=len(model_runs),
98 | )
99 | )
100 | logging.info(" * finished")
101 | # ensure that the callback reports all model runs are complete
102 | progress_callback(params["model_runs"])
103 |
104 | return [baseline, *model_results]
105 |
106 |
107 | def noop_progress_callback(_: Any) -> Callable[[Any], None]:
108 | """A no-op callback."""
109 | return lambda _: None
110 |
111 |
112 | def run_all(
113 | params: dict,
114 | nhp_data: Callable[[int, str], Data],
115 | progress_callback: Callable[[Any], Callable[[Any], None]] = noop_progress_callback,
116 | save_full_model_results: bool = False,
117 | ) -> Tuple[list, str]:
118 | """Run the model.
119 |
120 | Runs all 3 model types, aggregates and combines the results.
121 |
122 | Args:
123 | params: The parameters to use for this model run.
124 | nhp_data: The Data class to use for loading data.
125 | progress_callback: A callback function for updating progress.
126 | Defaults to noop_progress_callback.
127 | save_full_model_results: Whether to save full model results. Defaults to False.
128 |
129 | Returns:
130 | A tuple containing the list of saved files and the filename of the JSON results.
131 | """
132 | model_types = [InpatientsModel, OutpatientsModel, AaEModel]
133 | run_params = Model.generate_run_params(params)
134 |
135 | # set the data path in the HealthStatusAdjustment class
136 | hsa = HealthStatusAdjustmentInterpolated(
137 | nhp_data(params["start_year"], params["dataset"]), params["start_year"]
138 | )
139 |
140 | results, step_counts = combine_results(
141 | [
142 | _run_model(
143 | m,
144 | params,
145 | nhp_data,
146 | hsa,
147 | run_params,
148 | progress_callback(m.__name__[:-5]),
149 | save_full_model_results,
150 | )
151 | for m in model_types
152 | ]
153 | )
154 |
155 | json_filename = generate_results_json(results, step_counts, params, run_params)
156 |
157 | # TODO: once generate_results_json is deperecated this step should be moved into combine_results
158 | results["step_counts"] = step_counts
159 | # TODO: this should be what the model returns once generate_results_json is deprecated
160 | saved_files = save_results_files(results, params)
161 |
162 | return saved_files, json_filename
163 |
164 |
165 | def run_single_model_run(
166 | params: dict, data_path: str, model_type: Type[Model], model_run: int
167 | ) -> None:
168 | """Runs a single model iteration for easier debugging in vscode."""
169 | data = Local.create(data_path)
170 |
171 | print("initialising model... ", end="")
172 | model = timeit(model_type, params, data)
173 | print("running model... ", end="")
174 | m_run = timeit(ModelIteration, model, model_run)
175 | print("aggregating results... ", end="")
176 | model_results, step_counts = timeit(m_run.get_aggregate_results)
177 | print()
178 | print("change factors:")
179 | step_counts = (
180 | step_counts.reset_index()
181 | .groupby(["change_factor", "measure"], as_index=False)["value"]
182 | .sum()
183 | .pivot_table(index="change_factor", columns="measure")
184 | )
185 | step_counts.loc["total"] = step_counts.sum()
186 | print(step_counts.fillna(0).astype(int))
187 | print()
188 | print("aggregated (default) results:")
189 |
190 | default_results = (
191 | model_results["default"]
192 | .reset_index()
193 | .groupby(["pod", "measure"], as_index=False)
194 | .agg({"value": "sum"})
195 | .pivot_table(index=["pod"], columns="measure")
196 | .fillna(0)
197 | )
198 | default_results.loc["total"] = default_results.sum()
199 | print(default_results)
200 |
--------------------------------------------------------------------------------
/src/nhp/model/aae.py:
--------------------------------------------------------------------------------
1 | """Accident and Emergency Module.
2 |
3 | Implements the A&E model.
4 | """
5 |
6 | from typing import Any, Callable, Tuple
7 |
8 | import numpy as np
9 | import pandas as pd
10 |
11 | from nhp.model.data import Data
12 | from nhp.model.model import Model
13 | from nhp.model.model_iteration import ModelIteration
14 |
15 |
16 | class AaEModel(Model):
17 | """Accident and Emergency Model.
18 |
19 | Implementation of the Model for Accident and Emergency attendances.
20 |
21 | Args:
22 | params: The parameters to run the model with, or the path to a params file to load.
23 | data: A callable that creates a Data instance.
24 | hsa: An instance of the HealthStatusAdjustment class. If left as None an instance is
25 | created. Defaults to None.
26 | run_params: The parameters to use for each model run. Generated automatically if left as
27 | None. Defaults to None.
28 | save_full_model_results: Whether to save the full model results or not. Defaults to False.
29 | """
30 |
31 | def __init__(
32 | self,
33 | params: dict | str,
34 | data: Callable[[int, str], Data],
35 | hsa: Any = None,
36 | run_params: dict | None = None,
37 | save_full_model_results: bool = False,
38 | ) -> None:
39 | """Initialise the A&E Model.
40 |
41 | Args:
42 | params: The parameters to use.
43 | data: A method to create a Data instance.
44 | hsa: Health Status Adjustment object. Defaults to None.
45 | run_params: The run parameters to use. Defaults to None.
46 | save_full_model_results: Whether to save full model results. Defaults to False.
47 | """
48 | # call the parent init function
49 | super().__init__(
50 | "aae",
51 | ["arrivals"],
52 | params,
53 | data,
54 | hsa,
55 | run_params,
56 | save_full_model_results,
57 | )
58 |
59 | def _get_data(self, data_loader: Data) -> pd.DataFrame:
60 | return data_loader.get_aae()
61 |
62 | def get_data_counts(self, data: pd.DataFrame) -> np.ndarray:
63 | """Get row counts of data.
64 |
65 | Args:
66 | data: The data to get the counts of.
67 |
68 | Returns:
69 | The counts of the data, required for activity avoidance steps.
70 | """
71 | return np.array([data["arrivals"]]).astype(float)
72 |
73 | def _load_strategies(self, data_loader: Data) -> None:
74 | """Loads the activity mitigation strategies."""
75 | data = self.data.set_index("rn")
76 | self.strategies = {
77 | "activity_avoidance": pd.concat(
78 | [
79 | data[data[c]]["hsagrp"].str.replace("aae", n).rename("strategy")
80 | for (c, n) in [
81 | ("is_frequent_attender", "frequent_attenders"),
82 | ("is_left_before_treatment", "left_before_seen"),
83 | ("is_low_cost_referred_or_discharged", "low_cost_discharged"),
84 | ("is_discharged_no_treatment", "discharged_no_treatment"),
85 | ]
86 | ]
87 | )
88 | .to_frame()
89 | .assign(sample_rate=1)
90 | }
91 |
92 | def apply_resampling(self, row_samples: np.ndarray, data: pd.DataFrame) -> pd.DataFrame:
93 | """Apply row resampling.
94 |
95 | Called from within `model.activity_resampling.ActivityResampling.apply_resampling`.
96 |
97 | Args:
98 | row_samples: [1xn] array, where n is the number of rows in `data`, containing the new
99 | values for `data["arrivals"]`.
100 | data: The data that we want to update.
101 |
102 | Returns:
103 | The updated data.
104 | """
105 | data["arrivals"] = row_samples[0]
106 | # return the altered data
107 | return data
108 |
109 | def efficiencies(
110 | self, data: pd.DataFrame, model_iteration: ModelIteration
111 | ) -> tuple[pd.DataFrame, pd.DataFrame | None]:
112 | """Run the efficiencies steps of the model.
113 |
114 | Args:
115 | data: The data to apply efficiencies to.
116 | model_iteration: An instance of the ModelIteration class.
117 |
118 | Returns:
119 | Tuple containing the updated data and step counts (None for A&E).
120 | """
121 | # A&E doesn't have any efficiencies steps
122 | return data, None
123 |
124 | @staticmethod
125 | def process_results(data: pd.DataFrame) -> pd.DataFrame:
126 | """Process the data into a format suitable for aggregation in results files.
127 |
128 | Args:
129 | data: Data to be processed. Format should be similar to Model.data.
130 |
131 | Returns:
132 | Processed results.
133 | """
134 | data["measure"] = "walk-in"
135 | data.loc[data["is_ambulance"], "measure"] = "ambulance"
136 | data = data.rename(columns={"arrivals": "value"})
137 |
138 | # summarise the results to make the create_agg steps quicker
139 | data = (
140 | data.groupby( # ty: ignore[no-matching-overload]
141 | # note: any columns used in the calls to _create_agg, including pod and measure
142 | # must be included below
143 | [
144 | "pod",
145 | "sitetret",
146 | "acuity",
147 | "measure",
148 | "sex",
149 | "age",
150 | "age_group",
151 | "attendance_category",
152 | ],
153 | dropna=False,
154 | as_index=False,
155 | )
156 | .agg({"value": "sum"})
157 | .fillna("unknown")
158 | )
159 | return data
160 |
161 | def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]:
162 | """Create other aggregations specific to the model type.
163 |
164 | Args:
165 | model_results: The results of a model run.
166 |
167 | Returns:
168 | Dictionary containing the specific aggregations.
169 | """
170 | return {
171 | "acuity": self.get_agg(model_results, "acuity"),
172 | "attendance_category": self.get_agg(model_results, "attendance_category"),
173 | }
174 |
175 | def calculate_avoided_activity(
176 | self, data: pd.DataFrame, data_resampled: pd.DataFrame
177 | ) -> pd.DataFrame:
178 | """Calculate the rows that have been avoided.
179 |
180 | Args:
181 | data: The data before the binomial thinning step.
182 | data_resampled: The data after the binomial thinning step.
183 |
184 | Returns:
185 | The data that was avoided in the binomial thinning step.
186 | """
187 | avoided = data["arrivals"] - data_resampled["arrivals"]
188 | data["arrivals"] = avoided
189 | return data
190 |
191 | def save_results(self, model_iteration: ModelIteration, path_fn: Callable[[str], str]) -> None:
192 | """Save the results of running the model.
193 |
194 | This method is used for saving the results of the model run to disk as a parquet file.
195 | It saves just the `rn` (row number) column and the `arrivals`, with the intention that
196 | you rejoin to the original data.
197 |
198 | Args:
199 | model_iteration: An instance of the ModelIteration class.
200 | path_fn: A function which takes the activity type and returns a path.
201 | """
202 | model_iteration.get_model_results().set_index(["rn"])[["arrivals"]].to_parquet(
203 | f"{path_fn('aae')}/0.parquet"
204 | )
205 | model_iteration.avoided_activity.set_index(["rn"])[["arrivals"]].to_parquet(
206 | f"{path_fn('aae_avoided')}/0.parquet"
207 | )
208 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test_aae.py:
--------------------------------------------------------------------------------
1 | """Test a&e model."""
2 |
3 | from unittest.mock import Mock, call, patch
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import pytest
8 |
9 | from nhp.model.aae import AaEModel
10 |
11 |
12 | # fixtures
13 | @pytest.fixture
14 | def mock_model():
15 | """Create a mock Model instance."""
16 | with patch.object(AaEModel, "__init__", lambda s, p, d, h, r: None):
17 | mdl = AaEModel(None, None, None, None) # type: ignore
18 | mdl.model_type = "aae"
19 | mdl.params = {
20 | "dataset": "synthetic",
21 | "model_runs": 3,
22 | "seed": 1,
23 | "demographic_factors": {
24 | "file": "demographics_file.csv",
25 | "variant_probabilities": {"a": 0.6, "b": 0.4},
26 | },
27 | "start_year": 2018,
28 | "end_year": 2020,
29 | "health_status_adjustment": [0.8, 1.0],
30 | "waiting_list_adjustment": "waiting_list_adjustment",
31 | "expat": {
32 | "aae": {"ambulance": [0.7, 0.9]},
33 | "repat_local": {"aae": {"ambulance": [1.0, 1.2]}},
34 | "repat_nonlocal": {"aae": {"ambulance": [1.3, 1.5]}},
35 | },
36 | "non-demographic_adjustment": {
37 | "a": {"a_a": [1, 1.2], "a_b": [1, 1.2]},
38 | "b": {"b_a": [1, 1.2], "b_b": [1, 1.2]},
39 | },
40 | "inpatient_factors": {
41 | "admission_avoidance": {
42 | "a_a": {"interval": [0.4, 0.6]},
43 | "a_b": {"interval": [0.4, 0.6]},
44 | },
45 | "los_reduction": {
46 | "b_a": {"interval": [0.4, 0.6]},
47 | "b_b": {"interval": [0.4, 0.6]},
48 | },
49 | },
50 | "outpatient_factors": {
51 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}},
52 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}},
53 | },
54 | "aae_factors": {
55 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}},
56 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}},
57 | },
58 | }
59 | # create a minimal data object for testing
60 | mdl.data = pd.DataFrame(
61 | {
62 | "rn": list(range(1, 21)),
63 | "age": list(range(1, 6)) * 4,
64 | "sex": ([1] * 5 + [2] * 5) * 2,
65 | "hsagrp": [x for _ in range(1, 11) for x in ["aae_a_a", "aae_b_b"]],
66 | }
67 | )
68 | return mdl
69 |
70 |
71 | # methods
72 |
73 |
74 | def test_init_calls_super_init(mocker):
75 | """Test that the model calls the super method."""
76 | # arrange
77 | super_mock = mocker.patch("nhp.model.aae.super")
78 | # act
79 | AaEModel("params", "data_path", "hsa", "run_params") # type: ignore
80 | # assert
81 | super_mock.assert_called_once()
82 |
83 |
84 | def test_get_data(mock_model):
85 | # arrange
86 | mdl = mock_model
87 | data_loader = Mock()
88 | data_loader.get_aae.return_value = "aae data"
89 |
90 | # act
91 | actual = mdl._get_data(data_loader)
92 |
93 | # assert
94 | assert actual == "aae data"
95 | data_loader.get_aae.assert_called_once_with()
96 |
97 |
98 | def test_get_data_counts(mock_model):
99 | # arrange
100 | mdl = mock_model
101 | data = mdl.data
102 | data["arrivals"] = list(range(1, 21))
103 | # act
104 | actual = mdl.get_data_counts(data)
105 | # assert
106 | assert actual.tolist() == [[float(i) for i in range(1, 21)]]
107 |
108 |
109 | def test_load_strategies(mock_model):
110 | # arrange
111 | mdl = mock_model
112 | mdl.data["is_frequent_attender"] = [False] * 0 + [True] * 4 + [False] * 16
113 | mdl.data["is_left_before_treatment"] = [False] * 4 + [True] * 4 + [False] * 12
114 | mdl.data["is_low_cost_referred_or_discharged"] = [False] * 12 + [True] * 4 + [False] * 4
115 | mdl.data["is_discharged_no_treatment"] = [False] * 16 + [True] * 4
116 | # act
117 | mdl._load_strategies(None)
118 | # assert
119 | assert mdl.strategies["activity_avoidance"]["strategy"].to_list() == [
120 | "frequent_attenders_a_a",
121 | "frequent_attenders_b_b",
122 | "frequent_attenders_a_a",
123 | "frequent_attenders_b_b",
124 | "left_before_seen_a_a",
125 | "left_before_seen_b_b",
126 | "left_before_seen_a_a",
127 | "left_before_seen_b_b",
128 | "low_cost_discharged_a_a",
129 | "low_cost_discharged_b_b",
130 | "low_cost_discharged_a_a",
131 | "low_cost_discharged_b_b",
132 | "discharged_no_treatment_a_a",
133 | "discharged_no_treatment_b_b",
134 | "discharged_no_treatment_a_a",
135 | "discharged_no_treatment_b_b",
136 | ]
137 | assert mdl.strategies["activity_avoidance"]["sample_rate"].to_list() == [1] * 16
138 |
139 |
140 | def test_apply_resampling(mocker, mock_model):
141 | # arrange
142 | row_samples = np.array([[1, 2, 3, 4]])
143 | # act
144 | data = mock_model.apply_resampling(row_samples, pd.DataFrame())
145 | # assert
146 | assert data["arrivals"].to_list() == [1, 2, 3, 4]
147 |
148 |
149 | def test_efficiencies(mock_model):
150 | """Test the efficiencies method (pass)."""
151 | # arrange
152 |
153 | # act
154 | actual = mock_model.efficiencies("data", None)
155 |
156 | # assert
157 | assert actual == ("data", None)
158 |
159 |
160 | def test_specific_aggregations(mocker, mock_model):
161 | """Test that it aggregates the results correctly."""
162 | # arrange
163 | m = mocker.patch("nhp.model.AaEModel.get_agg", return_value="agg_data")
164 |
165 | mdl = mock_model
166 |
167 | # act
168 | actual = mdl.specific_aggregations("results") # type: ignore
169 |
170 | # assert
171 | assert actual == {
172 | "acuity": "agg_data",
173 | "attendance_category": "agg_data",
174 | }
175 |
176 | assert m.call_args_list == [
177 | call("results", "acuity"),
178 | call("results", "attendance_category"),
179 | ]
180 |
181 |
182 | def test_process_results(mock_model):
183 | # arrange
184 | data = pd.DataFrame(
185 | {
186 | "sitetret": ["trust"] * 4,
187 | "acuity": ["a", "a", "b", "b"],
188 | "attendance_category": [1, 1, 2, 2],
189 | "age": [1, 2, 3, 4],
190 | "age_group": [1] * 4,
191 | "sex": [1] * 4,
192 | "pod": ["aae_type-01", "aae_type-01", "aae_type-02", "aae_type-02"],
193 | "is_ambulance": [True, False, True, False],
194 | "value": [1, 2, 3, 4],
195 | }
196 | )
197 |
198 | expected = {
199 | "pod": ["aae_type-01", "aae_type-01", "aae_type-02", "aae_type-02"],
200 | "sitetret": ["trust"] * 4,
201 | "acuity": ["a", "a", "b", "b"],
202 | "measure": ["ambulance", "walk-in"] * 2,
203 | "sex": [1] * 4,
204 | "age": [1, 2, 3, 4],
205 | "age_group": [1] * 4,
206 | "attendance_category": [1, 1, 2, 2],
207 | "value": [1, 2, 3, 4],
208 | }
209 | # act
210 | actual = mock_model.process_results(data)
211 |
212 | # assert
213 | assert actual.to_dict("list") == expected
214 |
215 |
216 | def test_save_results(mocker, mock_model):
217 | """Test that it correctly saves the results."""
218 |
219 | def path_fn(x):
220 | return x
221 |
222 | mr_mock = Mock()
223 | mr_mock.get_model_results.return_value = pd.DataFrame({"rn": [0], "arrivals": [1]})
224 | mr_mock.avoided_activity = pd.DataFrame({"rn": [0], "arrivals": [1]})
225 |
226 | to_parquet_mock = mocker.patch("pandas.DataFrame.to_parquet")
227 | mock_model.save_results(mr_mock, path_fn)
228 | assert to_parquet_mock.call_args_list[0] == call("aae/0.parquet")
229 | assert to_parquet_mock.call_args_list[1] == call("aae_avoided/0.parquet")
230 |
231 |
232 | def test_calculate_avoided_activity(mock_model):
233 | # arrange
234 | data = pd.DataFrame({"rn": [0, 1], "arrivals": [4, 3]})
235 | data_resampled = pd.DataFrame({"rn": [0, 1], "arrivals": [2, 1]})
236 | # act
237 | actual = mock_model.calculate_avoided_activity(data, data_resampled)
238 | # assert
239 | assert actual.to_dict(orient="list") == {"rn": [0, 1], "arrivals": [2, 2]}
240 |
--------------------------------------------------------------------------------
/src/nhp/model/health_status_adjustment.py:
--------------------------------------------------------------------------------
1 | """Health Status Adjustment."""
2 |
3 | from math import pi, sqrt
4 | from typing import List
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 | from nhp.model.data import Data, reference
10 |
11 |
12 | class HealthStatusAdjustment:
13 | """Health Status Adjustment.
14 |
15 | Handles the logic for the health status adjustment in the model.
16 | """
17 |
18 | # load the static reference data files
19 |
20 | def __init__(self, data_loader: Data, base_year: str):
21 | """Initialise HealthStatusAdjustment.
22 |
23 | Base class that should not be used directly, instead see HealthStatusAdjustmentGAM
24 | or HealthStatusAdjustmentInterpolated.
25 |
26 | Args:
27 | data_loader: The data loader.
28 | base_year: The baseline year for the model run.
29 | """
30 | self._all_ages = np.arange(0, 101)
31 |
32 | self._load_life_expectancy_series(base_year)
33 | self._load_activity_ages(data_loader)
34 | self._cache = {}
35 |
36 | def _load_life_expectancy_series(self, base_year: str):
37 | # the age range that health status adjustment runs for
38 | # hardcoded to max out at 90 as ages >90 are mapped to 90
39 | self._ages = np.arange(55, 91)
40 | # load the life expectancy file, only select the rows for the ages we are interested in
41 | lexc = reference.life_expectancy().set_index(["var", "sex", "age"])
42 | lexc = lexc[lexc.index.isin(self._ages, level=2)]
43 | # calculate the life expectancy (change) between the model year and base year
44 | self._life_expectancy = lexc.apply(lambda x: x - lexc[str(base_year)])
45 |
46 | def _load_activity_ages(self, data_loader: Data):
47 | self._activity_ages = (
48 | data_loader.get_hsa_activity_table().set_index(["hsagrp", "sex", "age"]).sort_index()
49 | )["activity"]
50 |
51 | @staticmethod
52 | def generate_params(
53 | start_year: int,
54 | end_year: int,
55 | variants: List[str],
56 | rng: np.random.Generator,
57 | model_runs: int,
58 | ) -> np.ndarray:
59 | """Generate Health Status Adjustment Parameters.
60 |
61 | Args:
62 | start_year: The baseline year for the model.
63 | end_year: The year the model is running for.
64 | variants: List of population variants.
65 | rng: Random Number Generator.
66 | model_runs: Number of Model Runs.
67 |
68 | Returns:
69 | Parameters for the health status adjustment.
70 | """
71 | hsa_snp = reference.split_normal_params().set_index(["var", "sex", "year"])
72 |
73 | def gen(variant, sex):
74 | mode: float
75 | sd1: float
76 | sd2: float
77 | mode, sd1, sd2 = hsa_snp.loc[(variant, sex, end_year)] # type: ignore
78 |
79 | return np.concatenate(
80 | [
81 | [mode],
82 | HealthStatusAdjustment.random_splitnorm(rng, model_runs, mode, sd1, sd2),
83 | hsa_snp.loc[(variant, sex, np.arange(start_year + 1, end_year)), "mode"], # type: ignore
84 | ]
85 | )
86 |
87 | values = {
88 | v: np.transpose([gen(v, "m"), gen(v, "f")])
89 | for v in hsa_snp.index.levels[0] # type: ignore
90 | }
91 |
92 | variant_lookup = reference.variant_lookup()
93 | return np.array(
94 | [
95 | values[variant_lookup[v]][i]
96 | for i, v in enumerate(variants + variants[0:1] * (end_year - start_year - 1))
97 | ]
98 | )
99 |
100 | @staticmethod
101 | def random_splitnorm(
102 | rng: np.random.Generator,
103 | n: int,
104 | mode: float,
105 | sd1: float,
106 | sd2: float,
107 | ) -> np.ndarray:
108 | """Generate random splitnormal values.
109 |
110 | Args:
111 | rng: Random Number Generator.
112 | n: Number of random values to generate.
113 | mode: The mode of the distribution.
114 | sd1: The standard deviation of the left side of the distribution.
115 | sd2: The standard deviation of the right side of the distribution.
116 |
117 | Returns:
118 | n random number values sampled from the split normal distribution.
119 | """
120 | # lazy import for performance
121 | import scipy.stats as spt # noqa: PLC0415
122 |
123 | # get the probability of the mode
124 | A = sqrt(2 / pi) / (sd1 + sd2)
125 | a_sqrt_tau = A * sqrt(2 * pi)
126 | p = (a_sqrt_tau * sd1) / 2
127 |
128 | # generate n random uniform values
129 | u = rng.uniform(size=n)
130 |
131 | # whether u is less than the mode or not
132 | a1 = u <= p
133 |
134 | # make a single sd vector
135 | sd = np.array([sd1 if i else sd2 for i in a1])
136 | x = np.array([0 if i else a_sqrt_tau * sd2 - 1 for i in a1])
137 |
138 | return mode + sd * spt.norm.ppf((u + x) / (a_sqrt_tau * sd))
139 |
140 | def run(self, run_params: dict) -> pd.Series:
141 | """Return factor for health status adjustment.
142 |
143 | Args:
144 | run_params: The run parameters.
145 |
146 | Returns:
147 | The health status adjustment factor.
148 | """
149 | hsa_param = run_params["health_status_adjustment"]
150 | selected_variant = reference.variant_lookup()[run_params["variant"]]
151 | cache_key = (*hsa_param, selected_variant)
152 | if cache_key in self._cache:
153 | return self._cache[cache_key]
154 |
155 | lexc = self._life_expectancy.loc[(selected_variant, slice(None), slice(None))][ # type: ignore
156 | str(run_params["year"])
157 | ]
158 | hsa_param = np.repeat(hsa_param, len(self._ages))
159 | adjusted_ages = np.tile(self._ages, 2) - lexc * hsa_param
160 |
161 | factor = (
162 | self._predict_activity(adjusted_ages).rename_axis(["hsagrp", "sex", "age"])
163 | / self._activity_ages.loc[slice(None), slice(None), self._ages] # type: ignore
164 | ).rename("health_status_adjustment")
165 |
166 | # if any factor goes below 0, set it to 0
167 | factor[factor < 0] = 0
168 |
169 | self._cache[cache_key] = factor
170 | return factor
171 |
172 | def _predict_activity(self, adjusted_ages):
173 | raise NotImplementedError()
174 |
175 |
176 | class HealthStatusAdjustmentGAM(HealthStatusAdjustment):
177 | """Health Status Adjustment (GAMs)."""
178 |
179 | def __init__(self, data: Data, base_year: str):
180 | """Initialise HealthStatusAdjustmentGAM.
181 |
182 | Args:
183 | data: The data loader.
184 | base_year: The baseline year for the model run.
185 | """
186 | self._gams = data.get_hsa_gams()
187 |
188 | super().__init__(data, base_year)
189 |
190 | def _predict_activity(self, adjusted_ages):
191 | return pd.concat(
192 | {
193 | (h, s): pd.Series(
194 | g.predict(adjusted_ages.loc[s]),
195 | index=self._ages,
196 | ).apply(lambda x: x if x > 0 else 0)
197 | for (h, s), g in self._gams.items()
198 | }
199 | )
200 |
201 |
202 | class HealthStatusAdjustmentInterpolated(HealthStatusAdjustment):
203 | """Health Status Adjustment (Interpolated)."""
204 |
205 | def __init__(self, data: Data, base_year: str):
206 | """Initialise HealthStatusAdjustmentInterpolated.
207 |
208 | Args:
209 | data: The data loader.
210 | base_year: The baseline year for the model run.
211 | """
212 | super().__init__(data, base_year)
213 | self._load_activity_ages_lists()
214 |
215 | def _load_activity_ages_lists(self):
216 | self._activity_ages_lists = self._activity_ages.groupby(level=[0, 1]).agg(list)
217 |
218 | def _predict_activity(self, adjusted_ages):
219 | return pd.concat(
220 | {
221 | (h, s): pd.Series(
222 | np.interp(adjusted_ages.loc[s], self._all_ages, v),
223 | index=self._ages,
224 | ).apply(lambda x: x if x > 0 else 0)
225 | for (h, s), v in self._activity_ages_lists.items() # type: ignore
226 | }
227 | )
228 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test_inpatient_efficiencies.py:
--------------------------------------------------------------------------------
1 | """Test inpatient efficiencies."""
2 |
3 | from unittest.mock import Mock, patch
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import pytest
8 |
9 | from nhp.model.inpatients import InpatientEfficiencies
10 |
11 |
12 | @pytest.fixture
13 | def mock_ipe():
14 | """Create a mock Model instance."""
15 | with patch.object(InpatientEfficiencies, "__init__", lambda s, d, m: None):
16 | ipe = InpatientEfficiencies(None, None) # type: ignore
17 | ipe._model_iteration = Mock()
18 | ipe.losr = pd.DataFrame(
19 | {
20 | "type": [x for x in ["all", "sdec", "pre-op"] for _ in [0, 1]]
21 | + ["day_procedures_daycase", "day_procedures_outpatients"],
22 | "pre-op_days": [pd.NA] * 4 + [1, 2] + [pd.NA] * 2,
23 | "losr_f": [1 - 1 / (2**x) for x in range(8)],
24 | },
25 | index=["a", "b", "c", "d", "e", "f", "g", "h"],
26 | )
27 | return ipe
28 |
29 |
30 | def test_init(mocker):
31 | # arrange
32 | mocker.patch("nhp.model.inpatients.InpatientEfficiencies._select_single_strategy")
33 | mocker.patch("nhp.model.inpatients.InpatientEfficiencies._generate_losr_df")
34 |
35 | model_iteration = Mock()
36 | model_iteration.model_run = 0
37 | data = pd.DataFrame({"speldur": [1, 2, 3]})
38 | model_iteration.step_counts = "step_counts"
39 | model_iteration.model.strategies = {"efficiencies": "efficiencies"}
40 |
41 | # act
42 | actual = InpatientEfficiencies(data, model_iteration)
43 |
44 | # assert
45 | assert actual._model_iteration == model_iteration
46 | assert actual.data.equals(data)
47 | assert actual.strategies == "efficiencies"
48 | assert actual.speldur_before.to_list() == [1, 2, 3]
49 |
50 | actual._select_single_strategy.assert_called_once() # type: ignore
51 | actual._generate_losr_df.assert_called_once() # type: ignore
52 |
53 |
54 | def test_select_single_strategy(mock_ipe):
55 | # arrange
56 | m = mock_ipe
57 | m._model_iteration.rng = np.random.default_rng(0)
58 | m.data = pd.DataFrame({"rn": list(range(5)), "admimeth": ["0"] * 4 + ["3"]})
59 | m._model_iteration.model.strategies = {
60 | "efficiencies": pd.DataFrame({"strategy": ["a"] * 3 + ["b"] * 3}, index=[1, 2, 3] * 2)
61 | }
62 | m._model_iteration.params = {"efficiencies": {"ip": {"a": 2, "b": 3, "c": 4}}}
63 |
64 | # act
65 | m._select_single_strategy()
66 |
67 | # assert
68 | assert m.data.index.fillna("NULL").to_list() == [
69 | "NULL",
70 | "b",
71 | "b",
72 | "a",
73 | "NULL",
74 | ]
75 |
76 |
77 | def test_generate_losr_df(mock_ipe):
78 | # arrange
79 | m = mock_ipe
80 |
81 | m._model_iteration.params = {
82 | "efficiencies": {
83 | "ip": {
84 | "a": {"type": "1", "interval": [1, 3]},
85 | "b": {"type": "1", "interval": [2, 4]},
86 | "c": {"type": "2", "other": 1, "interval": [3, 5]},
87 | }
88 | }
89 | }
90 | m._model_iteration.run_params = {"efficiencies": {"ip": {"a": 2, "b": 3, "c": 4}}}
91 |
92 | expected = {
93 | "type": ["1", "1", "2"],
94 | "interval": [[1, 3], [2, 4], [3, 5]],
95 | "other": [None, None, 1.0],
96 | "losr_f": [2, 3, 4],
97 | }
98 |
99 | # act
100 | m._generate_losr_df()
101 | actual = m.losr.to_dict(orient="list")
102 | actual["other"] = [None if np.isnan(i) else i for i in actual["other"]]
103 |
104 | # assert
105 | assert actual == expected
106 |
107 |
108 | @pytest.mark.parametrize("losr_type", ["all", "sdec", "pre-op"])
109 | def test_losr_empty(mock_ipe, losr_type):
110 | """Test that if no preop strategy provided losr functions return self."""
111 | # arrange
112 | m = mock_ipe
113 | m.losr = m.losr[m.losr.type != losr_type]
114 | m.data = pd.DataFrame({"speldur": list(range(9))}, index=["x", "a", "b"] * 3)
115 |
116 | # act / assert
117 | match losr_type:
118 | case "all":
119 | assert m.losr_all() == m
120 | case "sdec":
121 | assert m.losr_sdec() == m
122 | case "pre-op":
123 | assert m.losr_preop() == m
124 |
125 |
126 | def test_losr_all(mock_ipe):
127 | """Test that it reduces the speldur column for 'all' types."""
128 | # arrange
129 | m = mock_ipe
130 | m.data = pd.DataFrame({"speldur": list(range(9))}, index=["x", "a", "b"] * 3)
131 | m._model_iteration.rng.binomial.return_value = np.arange(6)
132 |
133 | # act
134 | actual = m.losr_all()
135 | binomial_call_args = m._model_iteration.rng.binomial.call_args_list[0][0]
136 |
137 | # assert
138 | assert actual == m
139 |
140 | assert m.data["speldur"].to_list() == [0, 0, 3, 3, 1, 4, 6, 2, 5]
141 |
142 | assert binomial_call_args[0].to_list() == [1, 4, 7, 2, 5, 8]
143 | assert binomial_call_args[1].to_list() == [0, 0, 0, 0.5, 0.5, 0.5]
144 |
145 |
146 | def test_losr_sdec(mock_ipe):
147 | """Test that it reduces the speldur column for 'aec' types."""
148 | # arrange
149 | m = mock_ipe
150 | m.data = pd.DataFrame(
151 | {
152 | "speldur": list(range(9)),
153 | "classpat": ["1"] * 9,
154 | },
155 | index=["x", "c", "d"] * 3,
156 | )
157 | m._model_iteration.rng.binomial.return_value = [0, 0, 1, 0, 1, 1]
158 |
159 | # act
160 | actual = m.losr_sdec()
161 | binomial_call_args = m._model_iteration.rng.binomial.call_args_list[0][0]
162 |
163 | # assert
164 | assert actual == m
165 |
166 | assert m.data["speldur"].to_list() == [0, 0, 0, 3, 0, 5, 6, 7, 8]
167 | assert m.data["classpat"].to_list() == [
168 | "1",
169 | "-3",
170 | "-3",
171 | "1",
172 | "-3",
173 | "1",
174 | "1",
175 | "1",
176 | "1",
177 | ]
178 |
179 | assert binomial_call_args[0] == 1
180 | assert binomial_call_args[1].equals(m.losr.loc[["c"] * 3 + ["d"] * 3, "losr_f"])
181 |
182 |
183 | def test_losr_preop(mock_ipe):
184 | """Test that is reduces the speldur column for 'pre-op' types."""
185 | # arrange
186 | m = mock_ipe
187 | m.data = pd.DataFrame({"speldur": list(range(9))}, index=["x", "e", "f"] * 3)
188 | m._model_iteration.rng.binomial.return_value = [0, 1, 0, 1, 0, 1]
189 |
190 | # act
191 | actual = m.losr_preop()
192 | binomial_call_args = m._model_iteration.rng.binomial.call_args_list[0][0]
193 |
194 | # assert
195 | assert actual == m
196 |
197 | assert m.data["speldur"].to_list() == [0, 1, 0, 3, 3, 5, 6, 7, 6]
198 |
199 | assert binomial_call_args[0] == 1
200 | assert binomial_call_args[1].equals(1 - m.losr.loc[["e"] * 3 + ["f"] * 3, "losr_f"])
201 |
202 |
203 | @pytest.mark.parametrize(
204 | "day_procedures_type, expected_speldur, expected_classpat",
205 | [
206 | (
207 | "day_procedures_daycase",
208 | [0, 1, 2, 3, 0, 5, 6, 7, 8] * 2,
209 | (["1"] * 4 + ["-2"] + ["1"] * 4) * 2,
210 | ),
211 | (
212 | "day_procedures_outpatients",
213 | [0, 1, 2, 3, 4, 5, 6, 0, 8] * 2,
214 | (["1"] * 7 + ["-1"] + ["1"]) * 2,
215 | ),
216 | ],
217 | )
218 | def test_losr_day_procedures(mock_ipe, day_procedures_type, expected_speldur, expected_classpat):
219 | """Test that it reduces the speldur column for 'day_procedures' types."""
220 | # arrange
221 | m = mock_ipe
222 | strats = ["day_procedures_usually_dc", "day_procedures_usually_op"]
223 | # replace the index
224 | i = m.losr.index[~m.losr.type.str.startswith("day_procedures_")].to_list() + strats
225 | m.losr.index = i
226 |
227 | m.data = pd.DataFrame(
228 | {
229 | "speldur": list(range(9)) * 2,
230 | "classpat": ["1"] * 18,
231 | },
232 | index=[x for x in ["x"] + strats for _ in range(3)] * 2,
233 | )
234 | m._model_iteration.rng.binomial.return_value = np.tile([1, 0, 1], 2)
235 | m.step_counts = {}
236 |
237 | # act
238 | actual = m.losr_day_procedures(day_procedures_type)
239 |
240 | # assert
241 | assert actual == m
242 |
243 | assert m._model_iteration.rng.binomial.call_args[0][0] == 1
244 | assert (
245 | m._model_iteration.rng.binomial.call_args[0][1]
246 | == m.losr[m.losr.type == day_procedures_type]["losr_f"].repeat(6)
247 | ).all()
248 |
249 | assert m.data["speldur"].to_list() == expected_speldur
250 | assert m.data["classpat"].to_list() == expected_classpat
251 |
252 |
253 | def test_get_step_counts(mock_ipe):
254 | # arrange
255 | mock_ipe.data = pd.DataFrame(
256 | {
257 | "rn": ["1", "2", "3", "1"],
258 | "pod": ["a", "a", "a", "a"],
259 | "sitetret": ["a", "a", "a", "a"],
260 | "classpat": ["-1", "1", "1", "1"],
261 | "speldur": [1, 2, 3, 4],
262 | },
263 | index=["a", "b", "a", "a"],
264 | )
265 | mock_ipe.speldur_before = [3, 4, 5, 6]
266 |
267 | # act
268 | actual = mock_ipe.get_step_counts()
269 |
270 | # assert
271 | assert actual.to_dict("list") == {
272 | "pod": ["a", "a"],
273 | "sitetret": ["a", "a"],
274 | "strategy": ["a", "b"],
275 | "admissions": [-1, 0],
276 | "beddays": [-7, -2],
277 | "change_factor": ["efficiencies", "efficiencies"],
278 | }
279 |
--------------------------------------------------------------------------------
/src/nhp/model/data/reference/hsa_split_normal_params.csv:
--------------------------------------------------------------------------------
1 | var,sex,year,mode,sd1,sd2
2 | ppp,f,2020,-0.834139124472048,0.471364313830598,0.178658505716017
3 | ppp,f,2021,0.0917709146251422,0.036131621111269,0.0951795525527208
4 | ppp,f,2022,0.0536166368383793,0.0550077634560789,0.143073039832003
5 | ppp,f,2023,0.614412537448788,0.723130220541921,1.91266306661884
6 | ppp,f,2024,0.231095142009604,0.91139217468491,2.38418012311657
7 | ppp,f,2025,0.152008729180074,0.545496334363621,1.44256490718915
8 | ppp,f,2026,0.12446351318926,0.430806435794337,1.12531842730168
9 | ppp,f,2027,-0.00449203033812275,0.492163171476909,1.28655751090596
10 | ppp,f,2028,0.0126432843697,0.411838390705167,1.08255366531985
11 | ppp,f,2029,-0.0845170276686872,0.461454613119302,1.20842100540571
12 | ppp,f,2030,-0.0558685469792159,0.407905034677731,1.07422614501357
13 | ppp,f,2031,-0.0373610990433803,0.367034697951773,0.97828431168848
14 | ppp,f,2032,-0.102726179592795,0.400785988015702,1.0553364449507
15 | ppp,f,2033,-0.0809292556622157,0.375403586349822,0.980387648203298
16 | ppp,f,2034,-0.0650252171071374,0.350686144473125,0.924197174709526
17 | ppp,f,2035,-0.0530497688024056,0.336711955819495,0.880614039708151
18 | ppp,f,2036,-0.0972180283591862,0.356461296665913,0.934988090118291
19 | ppp,f,2037,-0.0837532290311653,0.340432413576408,0.898821814332704
20 | ppp,f,2038,-0.0730592327757793,0.326177849161222,0.864751590563446
21 | ppp,f,2039,-0.0644433388248815,0.322087793318307,0.836158863691571
22 | ppp,f,2040,-0.0980131878853271,0.332134169938724,0.877591369074311
23 | ppp,f,2041,-0.0885500471310261,0.321630773072594,0.851242804211436
24 | ppp,f,2042,-0.080692300423259,0.316536678604328,0.831289179237144
25 | ppp,f,2043,-0.0741188689527235,0.309193313445383,0.816114566681116
26 | ppp,m,2020,0.193742417540525,0.0181524421322472,0.0478105131873017
27 | ppp,m,2021,2.0779135826601,0.36322804004715,0.960165182522645
28 | ppp,m,2022,1.10719713339036,0.273608383470943,0.723845738383006
29 | ppp,m,2023,0.782288742589041,0.247085822613072,0.645577389082937
30 | ppp,m,2024,0.652673361248066,0.303970106573748,0.80826480754188
31 | ppp,m,2025,0.521119740007301,0.28024500654209,0.732039449987178
32 | ppp,m,2026,0.44138582263599,0.259884486195972,0.688130191255533
33 | ppp,m,2027,0.387561757199391,0.247719351110333,0.65544050161377
34 | ppp,m,2028,0.348543321439797,0.241337637151892,0.635854996676599
35 | ppp,m,2029,0.291848522826749,0.269025805872835,0.705927274958473
36 | ppp,m,2030,0.268919910246,0.258646617375023,0.686797157327441
37 | ppp,m,2031,0.250641131223844,0.252709155563897,0.666075513038602
38 | ppp,m,2032,0.235617235692693,0.247280334629383,0.653025928018798
39 | ppp,m,2033,0.222960528154995,0.242752142242392,0.6420618796478
40 | ppp,m,2034,0.186153161195007,0.26021861910284,0.689358292361689
41 | ppp,m,2035,0.178172095101815,0.257651587814224,0.675171714286035
42 | ppp,m,2036,0.171110522012626,0.250494118585006,0.664281420430122
43 | ppp,m,2037,0.164771407712308,0.250676426138864,0.659308336317986
44 | ppp,m,2038,0.159010260443084,0.249641390878626,0.653517705880693
45 | ppp,m,2039,0.131483694921187,0.260198196231541,0.688938510138538
46 | ppp,m,2040,0.127787283611896,0.257713947685093,0.677476263584794
47 | ppp,m,2041,0.12428993697815,0.257004994535618,0.674165387253765
48 | ppp,m,2042,0.120958477574032,0.255553469371667,0.664477823737272
49 | ppp,m,2043,0.117766712679071,0.250685526734408,0.662934913295114
50 | lle,f,2020,0.0480432374110331,0.234661348987938,0.0896777862662031
51 | lle,f,2021,0.00896721789974483,0.941815993291145,0.360824837951047
52 | lle,f,2022,0.00746028038796708,0.0535911827234823,0.141783851073904
53 | lle,f,2023,-0.0305165356416488,0.0720675873623765,0.190113355060559
54 | lle,f,2024,-0.226919187351552,0.905667212989931,2.36710037606041
55 | lle,f,2025,-0.608461965219194,1.08021621490582,2.85538264000775
56 | lle,f,2026,-0.990004743086818,1.25806514715961,3.32568856696691
57 | lle,f,2027,-0.459428522153902,0.726610618922386,1.91981850105593
58 | lle,f,2028,-0.651087219873458,0.82239998950192,2.14966376178759
59 | lle,f,2029,-0.412116864560758,0.614241436660381,1.59505292282129
60 | lle,f,2030,-0.540480868897651,0.668716601786072,1.7557864208922
61 | lle,f,2031,-0.668844873234513,0.729199196586484,1.92483430405873
62 | lle,f,2032,-0.486952310981194,0.595835609935167,1.56447647321396
63 | lle,f,2033,-0.583668968626731,0.643206426270219,1.68852158660256
64 | lle,f,2034,-0.680385626272249,0.690158495256528,1.80705619241684
65 | lle,f,2035,-0.777102283917764,0.731046769279183,1.93179702057134
66 | lle,f,2036,-0.8738189415633,0.780539434156057,2.04203622755017
67 | lle,f,2037,-0.970535599208814,0.821164963931148,2.16561236593075
68 | lle,f,2038,-0.767167868811377,0.696486293386151,1.83260087050192
69 | lle,f,2039,-0.844896118442104,0.735079010169354,1.94638991183251
70 | lle,f,2040,-0.92262436807281,0.775218820751697,2.03184276209203
71 | lle,f,2041,-1.00035261770354,0.820447215152552,2.11445345034923
72 | lle,f,2042,-1.07808086733425,0.845122639099083,2.2239889219515
73 | lle,f,2043,-1.15580911696495,0.88836946496922,2.32688717608471
74 | lle,m,2020,-1.41209734726175,0.475273925902037,0.180858247598276
75 | lle,m,2021,0.155459111608373,0.0365606275410303,0.0954628766460847
76 | lle,m,2022,0.117175805676228,0.0542507287267984,0.142572263602899
77 | lle,m,2023,1.30823874088294,0.730900285544872,1.91715849032506
78 | lle,m,2024,0.72035535093461,0.460237134562326,1.20915227497898
79 | lle,m,2025,0.526934459706753,0.549150388951955,1.44828744759318
80 | lle,m,2026,0.39344259856617,0.426217392467631,1.13067326659635
81 | lle,m,2027,0.263827217225195,0.492770319397573,1.29073587651717
82 | lle,m,2028,0.227981860814804,0.419089880580834,1.09385319034518
83 | lle,m,2029,0.130269234417311,0.468930036576562,1.22494799064566
84 | lle,m,2030,0.127101928910282,0.408852100919786,1.07503516692737
85 | lle,m,2031,0.0485309554788635,0.447870340303633,1.17082099551401
86 | lle,m,2032,-0.0300400179525582,0.481869531186059,1.27103127642089
87 | lle,m,2033,-0.00729747152485714,0.435219055480196,1.14833067995909
88 | lle,m,2034,-0.0731073429788956,0.46742874428569,1.22736881539028
89 | lle,m,2035,-0.138917214432931,0.49450455138525,1.31836834165726
90 | lle,m,2036,-0.204727085886984,0.528971997801088,1.39783691936413
91 | lle,m,2037,-0.161709866077686,0.483060734452584,1.27495206979726
92 | lle,m,2038,-0.218404664690736,0.5103744125288,1.33903944712956
93 | lle,m,2039,-0.275099463303799,0.533499730419066,1.41477100857002
94 | lle,m,2040,-0.331794261916849,0.564666881174168,1.48878913003762
95 | lle,m,2041,-0.388489060529912,0.592961887926123,1.55848832457676
96 | lle,m,2042,-0.445183859142959,0.619598496330786,1.62227113759142
97 | lle,m,2043,-0.501878657756009,0.646728234703606,1.69256573019323
98 | hle,f,2020,-1.29925192411912,0.476444847342563,0.179473515280399
99 | hle,f,2021,0.138104732832701,0.0366228762459261,0.0954556644444989
100 | hle,f,2022,1.45929349739205,0.547770245635813,1.43881348484704
101 | hle,f,2023,0.766995215657034,0.365267814635669,0.961763260063316
102 | hle,f,2024,0.57356190036596,0.458927225230286,1.2033408794936
103 | hle,f,2025,0.405498960646502,0.371620502878449,0.973332238273481
104 | hle,f,2026,0.320580182001043,0.323057426479148,0.854610217793067
105 | hle,f,2027,0.268919067785159,0.295828599484129,0.777142382432349
106 | hle,f,2028,0.23388678578409,0.27758962356472,0.73528486940539
107 | hle,f,2029,0.168225935639427,0.313988462121628,0.820183990102987
108 | hle,f,2030,0.151822448128685,0.297101943701489,0.77319148247016
109 | hle,f,2031,0.139076178102769,0.279530739632907,0.745410140607296
110 | hle,f,2032,0.128768053066729,0.27602639633731,0.722219885136169
111 | hle,f,2033,0.120166629523592,0.267481843453053,0.701163076592194
112 | hle,f,2034,0.112806443429856,0.257982155097726,0.68613724696742
113 | hle,f,2035,0.106377185423165,0.255608664909114,0.673813558834866
114 | hle,f,2036,0.124667946841992,0.233890310382506,0.619155756149472
115 | hle,f,2037,0.118142234352003,0.23152111132887,0.612056392136136
116 | hle,f,2038,0.112210408726827,0.231076327977737,0.6086621788749
117 | hle,f,2039,0.106767666402082,0.231708009886788,0.605497030559557
118 | hle,f,2040,0.101732493494371,0.229998093179998,0.602265828463221
119 | hle,f,2041,0.113580801623248,0.218363524611616,0.574437903684957
120 | hle,f,2042,0.108517976127184,0.219428048782622,0.573501970492013
121 | hle,f,2043,0.103754078624265,0.218528344363034,0.574892506500152
122 | hle,m,2020,2.98807783169218,0.183231996816577,0.481714705960317
123 | hle,m,2021,1.56227925790639,0.182987553470709,0.485171020904193
124 | hle,m,2022,1.08567682559972,0.185806266919959,0.489160953022212
125 | hle,m,2023,0.846373428662823,0.188136052209445,0.489813056389876
126 | hle,m,2024,0.748159711873529,0.231114226052169,0.606843317130409
127 | hle,m,2025,0.623017800128974,0.223471672703284,0.592047800991743
128 | hle,m,2026,0.53892173844356,0.218905906624878,0.576041675562774
129 | hle,m,2027,0.478280448220502,0.217254294086241,0.566527433065901
130 | hle,m,2028,0.432298390161436,0.214054064826854,0.560848417761751
131 | hle,m,2029,0.396089153545024,0.212474989868474,0.560786813640649
132 | hle,m,2030,0.379317466532436,0.192012710160569,0.506812163411555
133 | hle,m,2031,0.355740465386239,0.194349244744074,0.507902010956174
134 | hle,m,2032,0.335482331867593,0.194920131698057,0.511884320855141
135 | hle,m,2033,0.317831880056301,0.19427906454031,0.516011010367043
136 | hle,m,2034,0.302267573610902,0.197051780976947,0.52005162235752
137 | hle,m,2035,0.288398260275286,0.197334952876849,0.520559162711348
138 | hle,m,2036,0.275924823618311,0.199759589258858,0.522613898252706
139 | hle,m,2037,0.276542936683482,0.190437638836115,0.49915716898656
140 | hle,m,2038,0.266032191312865,0.191878846272318,0.503835008753617
141 | hle,m,2039,0.256331577733053,0.191511106900772,0.504439140771584
142 | hle,m,2040,0.257674491834682,0.184747366680036,0.490777191243843
143 | hle,m,2041,0.24911711710412,0.186866290071902,0.49425518055093
144 | hle,m,2042,0.241083983426635,0.190117492097557,0.501631742179263
145 | hle,m,2043,0.242615042585117,0.183994654945258,0.485809556480228
146 |
--------------------------------------------------------------------------------
/src/nhp/docker/run.py:
--------------------------------------------------------------------------------
1 | """Run the model inside of the docker container."""
2 |
3 | import gzip
4 | import json
5 | import logging
6 | import os
7 | import re
8 | from pathlib import Path
9 | from typing import Any, Callable
10 |
11 | from azure.identity import DefaultAzureCredential
12 | from azure.storage.blob import BlobServiceClient
13 | from azure.storage.filedatalake import DataLakeServiceClient
14 |
15 | from nhp.docker.config import Config
16 | from nhp.model.params import load_params
17 | from nhp.model.run import noop_progress_callback
18 |
19 |
20 | class RunWithLocalStorage:
21 | """Methods for running with local storage."""
22 |
23 | def __init__(self, filename: str):
24 | """Initialize the RunWithLocalStorage instance.
25 |
26 | Args:
27 | filename: Name of the parameter file to load.
28 | """
29 | self.params = load_params(f"queue/{filename}")
30 |
31 | def finish(
32 | self,
33 | results_file: str,
34 | saved_files: list,
35 | save_full_model_results: bool,
36 | additional_metadata: dict,
37 | ) -> None:
38 | """Post model run steps.
39 |
40 | Args:
41 | results_file: The path to the results file.
42 | saved_files: Filepaths of results, saved in parquet format and params in json format.
43 | save_full_model_results: Whether to save the full model results or not.
44 | additional_metadata: Additional metadata to log.
45 | """
46 |
47 | def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]:
48 | """Progress callback method.
49 |
50 | For local storage do nothing.
51 |
52 | Returns:
53 | A no-op progress callback function.
54 | """
55 | return noop_progress_callback
56 |
57 |
58 | class RunWithAzureStorage:
59 | """Methods for running with azure storage."""
60 |
61 | def __init__(self, filename: str, config: Config = Config()):
62 | """Initialise RunWithAzureStorage.
63 |
64 | Args:
65 | filename: Name of the parameter file to load.
66 | config: The configuration for the run. Defaults to Config().
67 | """
68 | logging.getLogger("azure.storage.common.storageclient").setLevel(logging.WARNING)
69 | logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
70 | logging.WARNING
71 | )
72 | self._config = config
73 |
74 | self._app_version = re.sub("(\\d+\\.\\d+)\\..*", "\\1", config.APP_VERSION)
75 |
76 | self._blob_storage_account_url = (
77 | f"https://{self._config.STORAGE_ACCOUNT}.blob.core.windows.net"
78 | )
79 | self._adls_storage_account_url = (
80 | f"https://{self._config.STORAGE_ACCOUNT}.dfs.core.windows.net"
81 | )
82 |
83 | self.params = self._get_params(filename)
84 | self._get_data(self.params["start_year"], self.params["dataset"])
85 |
86 | def _get_container(self, container_name: str):
87 | return BlobServiceClient(
88 | account_url=self._blob_storage_account_url,
89 | credential=DefaultAzureCredential(),
90 | ).get_container_client(container_name)
91 |
92 | def _get_params(self, filename: str) -> dict:
93 | """Get the parameters for the model.
94 |
95 | Args:
96 | filename: The name of the params file.
97 |
98 | Returns:
99 | The parameters for the model.
100 | """
101 | logging.info("downloading params: %s", filename)
102 |
103 | self._queue_blob = self._get_container("queue").get_blob_client(filename)
104 |
105 | params_content = self._queue_blob.download_blob().readall()
106 |
107 | return json.loads(params_content)
108 |
109 | def _get_data(self, year: str, dataset: str) -> None:
110 | """Get data to run the model.
111 |
112 | Downloads data from Azure storage for the specified year and dataset.
113 |
114 | Args:
115 | year: The year of data to load.
116 | dataset: The dataset to load.
117 | """
118 | logging.info("downloading data (%s / %s)", year, dataset)
119 | fs_client = DataLakeServiceClient(
120 | account_url=self._adls_storage_account_url,
121 | credential=DefaultAzureCredential(),
122 | ).get_file_system_client("data")
123 |
124 | version = self._config.DATA_VERSION
125 |
126 | paths = [p.name for p in fs_client.get_paths(version, recursive=False)]
127 |
128 | for p in paths:
129 | subpath = f"{p}/fyear={year}/dataset={dataset}"
130 | os.makedirs(f"data{subpath.removeprefix(version)}", exist_ok=True)
131 |
132 | for i in fs_client.get_paths(subpath):
133 | filename = i.name
134 | if not filename.endswith("parquet"):
135 | continue
136 |
137 | logging.info(" * %s", filename)
138 | local_name = "data" + filename.removeprefix(version)
139 | with open(local_name, "wb") as local_file:
140 | file_client = fs_client.get_file_client(filename)
141 | local_file.write(file_client.download_file().readall())
142 |
143 | def _upload_results_json(self, results_file: str, metadata: dict) -> None:
144 | """Upload the results.
145 |
146 | Once the model has run, upload the results to blob storage.
147 |
148 | Args:
149 | results_file: The saved results file.
150 | metadata: The metadata to attach to the blob.
151 | """
152 | container = self._get_container("results")
153 |
154 | with open(f"results/{results_file}.json", "rb") as file:
155 | container.upload_blob(
156 | f"prod/{self._app_version}/{results_file}.json.gz",
157 | gzip.compress(file.read()),
158 | metadata=metadata,
159 | overwrite=True,
160 | )
161 |
162 | def _upload_results_files(self, files: list, metadata: dict) -> None:
163 | """Upload the results.
164 |
165 | Once the model has run, upload the files (parquet for model results and json for
166 | model params) to blob storage.
167 |
168 | Args:
169 | files: List of files to be uploaded.
170 | metadata: The metadata to attach to the blob.
171 | """
172 | container = self._get_container("results")
173 | for file in files:
174 | filename = file[8:]
175 | if file.endswith(".json"):
176 | metadata_to_use = metadata
177 | else:
178 | metadata_to_use = None
179 | with open(file, "rb") as f:
180 | container.upload_blob(
181 | f"aggregated-model-results/{self._app_version}/{filename}",
182 | f.read(),
183 | overwrite=True,
184 | metadata=metadata_to_use,
185 | )
186 |
187 | def _upload_full_model_results(self) -> None:
188 | container = self._get_container("results")
189 |
190 | dataset = self.params["dataset"]
191 | scenario = self.params["scenario"]
192 | create_datetime = self.params["create_datetime"]
193 |
194 | path = Path(f"results/{dataset}/{scenario}/{create_datetime}")
195 |
196 | for file in path.glob("**/*.parquet"):
197 | filename = file.as_posix()[8:]
198 | with open(file, "rb") as f:
199 | container.upload_blob(
200 | f"full-model-results/{self._app_version}/{filename}",
201 | f.read(),
202 | overwrite=True,
203 | )
204 |
205 | def _cleanup(self) -> None:
206 | """Cleanup.
207 |
208 | Once the model has run, remove the file from the queue.
209 | """
210 | logging.info("cleaning up queue")
211 |
212 | self._queue_blob.delete_blob()
213 |
214 | def finish(
215 | self,
216 | results_file: str,
217 | saved_files: list,
218 | save_full_model_results: bool,
219 | additional_metadata: dict,
220 | ) -> None:
221 | """Post model run steps.
222 |
223 | Args:
224 | results_file: The path to the results file.
225 | saved_files: Filepaths of results, saved in parquet format and params in json format.
226 | save_full_model_results: Whether to save the full model results or not.
227 | additional_metadata: Additional metadata to log.
228 | """
229 | metadata = {
230 | k: str(v)
231 | for k, v in self.params.items()
232 | if not isinstance(v, dict) and not isinstance(v, list)
233 | }
234 | metadata.update({k: str(v) for k, v in additional_metadata.items()})
235 |
236 | self._upload_results_json(results_file, metadata)
237 | self._upload_results_files(saved_files, metadata)
238 | if save_full_model_results:
239 | self._upload_full_model_results()
240 | self._cleanup()
241 |
242 | def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]:
243 | """Progress callback method.
244 |
245 | Updates the metadata for the blob in the queue to give progress.
246 |
247 | Returns:
248 | A callback function that updates progress for each model type.
249 | """
250 | blob = self._queue_blob
251 |
252 | current_progress = {
253 | **blob.get_blob_properties()["metadata"],
254 | "Inpatients": 0,
255 | "Outpatients": 0,
256 | "AaE": 0,
257 | }
258 |
259 | blob.set_blob_metadata({k: str(v) for k, v in current_progress.items()})
260 |
261 | def callback(model_type: Any) -> Callable[[Any], None]:
262 | def update(n_completed: Any) -> None:
263 | current_progress[model_type] = n_completed
264 | blob.set_blob_metadata({k: str(v) for k, v in current_progress.items()})
265 |
266 | return update
267 |
268 | return callback
269 |
--------------------------------------------------------------------------------
/src/nhp/model/outpatients.py:
--------------------------------------------------------------------------------
1 | """Outpatients Module.
2 |
3 | Implements the Outpatients model.
4 | """
5 |
6 | from typing import Any, Callable, Tuple
7 |
8 | import numpy as np
9 | import pandas as pd
10 |
11 | from nhp.model.data import Data
12 | from nhp.model.model import Model
13 | from nhp.model.model_iteration import ModelIteration
14 |
15 |
16 | class OutpatientsModel(Model):
17 | """Outpatients Model.
18 |
19 | Implementation of the Model for Outpatient attendances.
20 |
21 | Args:
22 | params: The parameters to run the model with, or the path to a params file to load.
23 | data: A callable that creates a Data instance.
24 | hsa: An instance of the HealthStatusAdjustment class. If left as None an instance is
25 | created. Defaults to None.
26 | run_params: The parameters to use for each model run. Generated automatically if left as
27 | None. Defaults to None.
28 | save_full_model_results: Whether to save the full model results or not. Defaults to False.
29 | """
30 |
31 | def __init__(
32 | self,
33 | params: dict | str,
34 | data: Callable[[int, str], Data],
35 | hsa: Any = None,
36 | run_params: dict | None = None,
37 | save_full_model_results: bool = False,
38 | ) -> None:
39 | """Initialise the Outpatients Model.
40 |
41 | Args:
42 | params: The parameters to use.
43 | data: A method to create a Data instance.
44 | hsa: Health Status Adjustment object. Defaults to None.
45 | run_params: The run parameters to use. Defaults to None.
46 | save_full_model_results: Whether to save full model results. Defaults to False.
47 | """
48 | # call the parent init function
49 | super().__init__(
50 | "op",
51 | ["attendances", "tele_attendances"],
52 | params,
53 | data,
54 | hsa,
55 | run_params,
56 | save_full_model_results,
57 | )
58 |
59 | def _get_data(self, data_loader: Data) -> pd.DataFrame:
60 | return data_loader.get_op()
61 |
62 | def get_data_counts(self, data: pd.DataFrame) -> np.ndarray:
63 | """Get row counts of data.
64 |
65 | Args:
66 | data: The data to get the counts of.
67 |
68 | Returns:
69 | The counts of the data, required for activity avoidance steps.
70 | """
71 | return data[["attendances", "tele_attendances"]].to_numpy().astype(float).transpose()
72 |
73 | def _load_strategies(self, data_loader: Data) -> None:
74 | data = self.data.set_index("rn")
75 |
76 | activity_avoidance = pd.concat(
77 | [
78 | "followup_reduction_" + data[~data["is_first"] & ~data["has_procedures"]]["type"],
79 | "consultant_to_consultant_reduction_" + data[data["is_cons_cons_ref"]]["type"],
80 | "gp_referred_first_attendance_reduction_"
81 | + data[data["is_gp_ref"] & data["is_first"]]["type"],
82 | ]
83 | )
84 | efficiencies: pd.Series = pd.concat( # type: ignore
85 | ["convert_to_tele_" + data[~data["has_procedures"]]["type"]]
86 | )
87 |
88 | self.strategies: dict[str, pd.DataFrame] = {
89 | k: v.rename("strategy").to_frame().assign(sample_rate=1)
90 | for k, v in {
91 | "activity_avoidance": activity_avoidance,
92 | "efficiencies": efficiencies,
93 | }.items()
94 | }
95 |
96 | @staticmethod
97 | def _convert_to_tele(
98 | data: pd.DataFrame,
99 | model_iteration: ModelIteration,
100 | ) -> tuple[pd.DataFrame, pd.DataFrame]:
101 | """Convert attendances to tele-attendances.
102 |
103 | Args:
104 | data: The DataFrame that we are updating.
105 | model_iteration: The model iteration containing the RNG and run parameters.
106 |
107 | Returns:
108 | A tuple containing the updated data and the updated step counts.
109 | """
110 | # TODO: we need to make sure efficiences contains convert to tele keys
111 | rng = model_iteration.rng
112 | params = model_iteration.run_params["efficiencies"]["op"]
113 | strategies = model_iteration.model.strategies["efficiencies"]
114 | # make sure to take the complement of the parameter
115 | factor = 1 - data["rn"].map(strategies["strategy"].map(params)).fillna(1)
116 | # create a value for converting attendances into tele attendances for each row
117 | # the value will be a random binomial value, i.e. we will convert between 0 and attendances
118 | # into tele attendances
119 | tele_conversion = rng.binomial(data["attendances"].to_list(), factor.to_list())
120 | # update the columns, subtracting tc from one, adding tc to the other (we maintain the
121 | # number of overall attendances)
122 | data["attendances"] -= tele_conversion
123 | data["tele_attendances"] += tele_conversion
124 |
125 | step_counts = (
126 | pd.DataFrame(
127 | {
128 | "pod": data["pod"],
129 | "sitetret": data["sitetret"],
130 | "change_factor": "efficiencies",
131 | "strategy": "convert_to_tele",
132 | "attendances": tele_conversion * -1,
133 | "tele_attendances": tele_conversion,
134 | }
135 | )
136 | .groupby(["pod", "sitetret", "change_factor", "strategy"], as_index=False) # ty: ignore[no-matching-overload]
137 | .sum()
138 | .query("attendances<0")
139 | )
140 | return data, step_counts
141 |
142 | def apply_resampling(self, row_samples: np.ndarray, data: pd.DataFrame) -> pd.DataFrame:
143 | """Apply row resampling.
144 |
145 | Called from within `model.activity_resampling.ActivityResampling.apply_resampling`.
146 |
147 | Args:
148 | row_samples: [2xn] array, where n is the number of rows in `data`, containing the new
149 | values for `data["attendances"]` and `data["tele_attendances"]`.
150 | data: The data that we want to update.
151 |
152 | Returns:
153 | The updated data.
154 | """
155 | data["attendances"] = row_samples[0]
156 | data["tele_attendances"] = row_samples[1]
157 | # return the altered data
158 | return data
159 |
160 | def efficiencies(
161 | self, data: pd.DataFrame, model_iteration: ModelIteration
162 | ) -> tuple[pd.DataFrame, pd.DataFrame | None]:
163 | """Run the efficiencies steps of the model.
164 |
165 | Args:
166 | data: The data to apply efficiencies to.
167 | model_iteration: An instance of the ModelIteration class.
168 |
169 | Returns:
170 | Tuple containing the updated data and step counts.
171 | """
172 | data, step_counts = self._convert_to_tele(data, model_iteration)
173 | return data, step_counts
174 |
175 | def calculate_avoided_activity(
176 | self, data: pd.DataFrame, data_resampled: pd.DataFrame
177 | ) -> pd.DataFrame:
178 | """Calculate the rows that have been avoided.
179 |
180 | Args:
181 | data: The data before the binomial thinning step.
182 | data_resampled: The data after the binomial thinning step.
183 |
184 | Returns:
185 | The data that was avoided in the binomial thinning step.
186 | """
187 | avoided = (
188 | data[["attendances", "tele_attendances"]]
189 | - data_resampled[["attendances", "tele_attendances"]]
190 | )
191 | data[["attendances", "tele_attendances"]] = avoided
192 | return data
193 |
194 | @staticmethod
195 | def process_results(data: pd.DataFrame) -> pd.DataFrame:
196 | """Process the data into a format suitable for aggregation in results files.
197 |
198 | Args:
199 | data: Data to be processed. Format should be similar to Model.data.
200 |
201 | Returns:
202 | Processed results.
203 | """
204 | measures = data.melt(["rn"], ["attendances", "tele_attendances"], "measure")
205 |
206 | # note: any columns used in the calls to _create_agg, including pod and measure
207 | # must be included below
208 | agg_cols = [
209 | "pod",
210 | "sitetret",
211 | "measure",
212 | "sex",
213 | "age_group",
214 | "age",
215 | "tretspef",
216 | "tretspef_grouped",
217 | ]
218 | data = (
219 | data.drop(["attendances", "tele_attendances"], axis="columns")
220 | .merge(measures, on="rn")
221 | # summarise the results to make the create_agg steps quicker
222 | .groupby(
223 | agg_cols,
224 | dropna=False,
225 | as_index=False,
226 | ) # ty: ignore[no-matching-overload]
227 | .agg({"value": "sum"})
228 | .fillna("unknown")
229 | )
230 | return data
231 |
232 | def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]:
233 | """Create other aggregations specific to the model type.
234 |
235 | Args:
236 | model_results: The results of a model run.
237 |
238 | Returns:
239 | Dictionary containing the specific aggregations.
240 | """
241 | return {
242 | "sex+tretspef_grouped": self.get_agg(model_results, "sex", "tretspef_grouped"),
243 | "tretspef": self.get_agg(model_results, "tretspef"),
244 | }
245 |
246 | def save_results(self, model_iteration: ModelIteration, path_fn: Callable[[str], str]) -> None:
247 | """Save the results of running the model.
248 |
249 | This method is used for saving the results of the model run to disk as a parquet file.
250 | It saves just the `rn` (row number) column and the `attendances` and `tele_attendances`
251 | columns, with the intention that you rejoin to the original data.
252 |
253 | Args:
254 | model_iteration: An instance of the ModelIteration class.
255 | path_fn: A function which takes the activity type and returns a path.
256 | """
257 | model_iteration.get_model_results().set_index(["rn"])[
258 | ["attendances", "tele_attendances"]
259 | ].to_parquet(f"{path_fn('op')}/0.parquet")
260 |
261 | model_iteration.avoided_activity.set_index(["rn"])[
262 | ["attendances", "tele_attendances"]
263 | ].to_parquet(f"{path_fn('op_avoided')}/0.parquet")
264 |
--------------------------------------------------------------------------------
/src/nhp/model/model_iteration.py:
--------------------------------------------------------------------------------
1 | """Model Iteration.
2 |
3 | Provides a simple class which holds all of the data required for a model iteration
4 | """
5 |
6 | from typing import TYPE_CHECKING
7 |
8 | import numpy as np
9 | import pandas as pd
10 |
11 | from nhp.model.activity_resampling import ActivityResampling
12 |
13 | ModelRunResult = tuple[dict[str, pd.Series], pd.Series | None]
14 |
15 | if TYPE_CHECKING:
16 | from nhp.model.model import Model
17 |
18 |
19 | class ModelIteration:
20 | """Model Iteration.
21 |
22 | Holds all of the information for a model iteration.
23 | """
24 |
25 | def __init__(self, model: "Model", model_run: int):
26 | """Perform an iteration of the model.
27 |
28 | Args:
29 | model: An instance of a Model object.
30 | model_run: Which model iteration to run.
31 | """
32 | self.model: Model = model
33 |
34 | self.model_run = model_run
35 | # if model_run == -1, then use model_run = 0 for run params
36 | self.run_params = model._get_run_params(max(0, model_run))
37 | self.rng = np.random.default_rng(self.run_params["seed"])
38 |
39 | self._patch_run_params()
40 |
41 | # data is mutated, so is not a property
42 | self.data = model.data.copy()
43 | self.step_counts = None
44 | self.avoided_activity = pd.DataFrame()
45 |
46 | # run the model
47 | self._run()
48 |
49 | @property
50 | def params(self):
51 | """Get the models parameters."""
52 | return self.model.params
53 |
54 | def _patch_run_params(self):
55 | """Patch Run Parameters.
56 |
57 | The run parameters for some items need to be 'patched' so that they include all of the
58 | fields that are used in that step of the model
59 | """
60 | run_params = self.run_params
61 | for i in ["expat", "repat_local", "repat_nonlocal"]:
62 | run_params[i]["op"] = {
63 | g: run_params[i]["op"] for g in ["first", "followup", "procedure"]
64 | }
65 | run_params[i]["aae"] = {k: {"Other": v} for k, v in run_params[i]["aae"].items()}
66 |
67 | run_params["baseline_adjustment"]["aae"] = {
68 | k: {"Other": v} for k, v in run_params["baseline_adjustment"]["aae"].items()
69 | }
70 |
71 | def _run(self):
72 | if self.model_run == 0:
73 | return
74 |
75 | data_ar, step_counts_ar = (
76 | ActivityResampling(self)
77 | .demographic_adjustment()
78 | .birth_adjustment()
79 | .health_status_adjustment()
80 | .expat_adjustment()
81 | .repat_adjustment()
82 | .waiting_list_adjustment()
83 | .baseline_adjustment()
84 | .non_demographic_adjustment()
85 | .inequalities_adjustment()
86 | # call apply_resampling last, as this is what actually alters the data
87 | .apply_resampling()
88 | )
89 |
90 | data_aa, step_counts_aa = self.model.activity_avoidance(data_ar.copy(), self)
91 | data_ef, step_counts_ef = self.model.efficiencies(data_aa.copy(), self)
92 |
93 | self.avoided_activity = self.model.calculate_avoided_activity(data_ar, data_aa)
94 |
95 | self.data = data_ef
96 |
97 | step_counts_dfs_to_concat: list[pd.DataFrame] = [
98 | self.model.baseline_step_counts,
99 | step_counts_ar,
100 | step_counts_aa if step_counts_aa is not None else pd.DataFrame(),
101 | step_counts_ef if step_counts_ef is not None else pd.DataFrame(),
102 | ]
103 |
104 | self.step_counts = pd.concat(step_counts_dfs_to_concat)
105 |
106 | def fix_step_counts(
107 | self,
108 | data: pd.DataFrame,
109 | future: np.ndarray,
110 | factors: pd.DataFrame,
111 | term_name: str,
112 | ) -> pd.DataFrame:
113 | """Calculate the step counts.
114 |
115 | Calculates the step counts for the current model run, saving back to
116 | self._model_run.step_counts.
117 |
118 | Args:
119 | data: The data for the current model run.
120 | future: The future row counts after running the poisson resampling.
121 | factors: The factors for this current model run.
122 | term_name: The name of the interaction term for this step.
123 |
124 | Returns:
125 | The step counts for this step.
126 | """
127 | before = self.model.get_data_counts(data)
128 | # convert the paramater values from a dict of 2d numpy arrays to a 3d numpy array
129 | param_values = np.array(list(factors.to_numpy().transpose()))
130 | # later on we want to be able to multiply by baseline, we need to have compatible
131 | # numpy shapes
132 | # our param values has shape of (x, y). baseline has shape of (z, y).
133 | # (x, 1, y) will allow (x, y) * (z, y)
134 | shape = (param_values.shape[0], 1, param_values.shape[1])
135 | # calculate the simple effect of each parameter, if it was performed in isolation to all
136 | # other parameters
137 | param_simple_effects = (param_values - 1).reshape(shape) * before
138 | # what is the difference left over from the expected changes (model interaction term)
139 | diff = future - (before + param_simple_effects.sum(axis=0))
140 | # convert the 3d numpy array back to a pandas dataframe aggregated by the columns we are
141 | # interested in
142 | idx = pd.MultiIndex.from_frame(data[["pod", "sitetret"]]) # ty: ignore[invalid-argument-type]
143 | return pd.concat(
144 | [
145 | pd.DataFrame(v.transpose(), columns=self.model.measures, index=idx)
146 | .groupby(level=idx.names)
147 | .sum()
148 | .assign(change_factor=k)
149 | .reset_index()
150 | for k, v in {
151 | **dict(zip(factors.columns, param_simple_effects)),
152 | term_name: diff,
153 | }.items()
154 | ]
155 | )
156 |
157 | def get_aggregate_results(self) -> ModelRunResult:
158 | """Aggregate the model results.
159 |
160 | Can also be used to aggregate the baseline data by passing in the raw data.
161 |
162 | Returns:
163 | A tuple containing a dictionary of results, and the step counts.
164 | """
165 | aggregations = self.model.aggregate(self)
166 |
167 | if not self.avoided_activity.empty:
168 | avoided_activity_agg = self.model.process_results(self.avoided_activity)
169 | aggregations["avoided_activity"] = self.model.get_agg(
170 | avoided_activity_agg, "sex", "age_group"
171 | )
172 |
173 | return aggregations, self.get_step_counts()
174 |
175 | def get_step_counts(self) -> pd.Series | None:
176 | """Get the step counts of a model run."""
177 | if self.step_counts is None:
178 | return None
179 |
180 | step_counts = (
181 | self.step_counts.melt(
182 | [i for i in self.step_counts.columns if i not in self.model.measures],
183 | var_name="measure",
184 | )
185 | .assign(activity_type=self.model.model_type)
186 | .set_index(
187 | [
188 | "activity_type",
189 | "sitetret",
190 | "pod",
191 | "change_factor",
192 | "strategy",
193 | "measure",
194 | ]
195 | )
196 | .sort_index()["value"]
197 | )
198 |
199 | step_counts = self._step_counts_get_type_changes(step_counts)
200 |
201 | return step_counts
202 |
203 | def _step_counts_get_type_changes(self, step_counts) -> pd.Series:
204 | return pd.concat(
205 | [
206 | step_counts,
207 | self._step_counts_get_type_change_daycase(step_counts),
208 | self._step_counts_get_type_change_outpatients(step_counts),
209 | self._step_counts_get_type_change_sdec(step_counts),
210 | ]
211 | ) # type: ignore
212 |
213 | def _step_counts_get_type_change_daycase(self, step_counts):
214 | # get the daycase conversion values
215 | sc_tc_df = (
216 | step_counts[
217 | step_counts.index.isin(
218 | ["day_procedures_usually_dc", "day_procedures_occasionally_dc"],
219 | level="strategy",
220 | )
221 | ]
222 | .to_frame()
223 | .reset_index()
224 | )
225 | sc_tc_df["pod"] = "ip_elective_daycase"
226 | sc_tc_df.loc[sc_tc_df["measure"] == "beddays", "value"] = sc_tc_df.loc[
227 | sc_tc_df["measure"] == "admissions", "value"
228 | ].tolist()
229 | return sc_tc_df.groupby(step_counts.index.names)["value"].sum() * -1
230 |
231 | def _step_counts_get_type_change_outpatients(self, step_counts):
232 | # get the outpatient conversion values
233 | sc_tc_df = (
234 | step_counts[
235 | step_counts.index.isin(
236 | ["day_procedures_usually_op", "day_procedures_occasionally_op"],
237 | level="strategy",
238 | )
239 | & (step_counts.index.get_level_values("measure") == "admissions")
240 | ]
241 | .to_frame()
242 | .reset_index()
243 | )
244 |
245 | sc_tc_df["activity_type"] = "op"
246 | sc_tc_df["pod"] = "op_procedure"
247 | sc_tc_df["measure"] = "attendances"
248 |
249 | return sc_tc_df.groupby(step_counts.index.names)["value"].sum() * -1
250 |
251 | def _step_counts_get_type_change_sdec(self, step_counts):
252 | # get the sdec conversion values
253 | sc_tc_df = (
254 | step_counts[
255 | step_counts.index.isin(
256 | [
257 | f"same_day_emergency_care_{i}"
258 | for i in ["very_high", "high", "moderate", "low"]
259 | ],
260 | level="strategy",
261 | )
262 | & (step_counts.index.get_level_values("measure") == "admissions")
263 | ]
264 | .to_frame()
265 | .reset_index()
266 | )
267 |
268 | sc_tc_df["activity_type"] = "aae"
269 | sc_tc_df["pod"] = "aae_type-05"
270 | sc_tc_df["measure"] = "arrivals"
271 |
272 | return sc_tc_df.groupby(step_counts.index.names)["value"].sum() * -1
273 |
274 | def get_model_results(self):
275 | """Get the model results of a model run."""
276 | return self.data.reset_index(drop=True).drop(columns=["hsagrp"])
277 |
--------------------------------------------------------------------------------
/src/nhp/model/activity_resampling.py:
--------------------------------------------------------------------------------
1 | """Inpatient Row Resampling.
2 |
3 | Methods for handling row resampling
4 | """
5 |
6 | from typing import TYPE_CHECKING
7 |
8 | import numpy as np
9 | import pandas as pd
10 |
11 | if TYPE_CHECKING:
12 | from nhp.model.model_iteration import ModelIteration
13 |
14 |
15 | class ActivityResampling:
16 | """Activity Resampling.
17 |
18 | Class for handling the activity resampling methods in the model. The class keeps track
19 | of the current row counts, which represent the value for the lambda parameter to a
20 | random poisson when we come to resample the rows, and the step counts (the estimated
21 | effect of each step on the total number of rows).
22 |
23 | The public methods of this class are intended to each be called either once, or not
24 | at all.
25 | These methods update the row counts by multiplying the current value by the factor
26 | generated from that method.
27 |
28 | Once all of the methods have been run, finally we need to call the `apply_resampling`
29 | method.
30 | This updates the `model_iteration` which is passed in at initialisation.
31 |
32 | Args:
33 | model_iteration: The model iteration object, which contains all of the required
34 | values to run the model.
35 | """
36 |
37 | def __init__(self, model_iteration: "ModelIteration") -> None:
38 | """Initialise ActivityResampling.
39 |
40 | Args:
41 | model_iteration: The current model iteration we are performing.
42 | """
43 | self._model_iteration = model_iteration
44 |
45 | # initialise step counts
46 | self.factors = []
47 |
48 | @property
49 | def _baseline_counts(self):
50 | return self._model_iteration.model.baseline_counts
51 |
52 | @property
53 | def _activity_type(self):
54 | return self._model_iteration.model.model_type
55 |
56 | @property
57 | def params(self):
58 | """Get the models params."""
59 | return self._model_iteration.params
60 |
61 | @property
62 | def run_params(self):
63 | """Get the current params for the model run."""
64 | return self._model_iteration.run_params
65 |
66 | @property
67 | def demog_factors(self):
68 | """Get the demographic factors for the model."""
69 | return self._model_iteration.model.demog_factors
70 |
71 | @property
72 | def birth_factors(self):
73 | """Get the birth factors for the model."""
74 | return self._model_iteration.model.birth_factors
75 |
76 | @property
77 | def hsa(self):
78 | """Get the health status adjustment GAMs for the model."""
79 | return self._model_iteration.model.hsa
80 |
81 | @property
82 | def inequalities_factors(self):
83 | """Get the inequalities factors for the model."""
84 | return self._model_iteration.model.inequalities_factors
85 |
86 | @property
87 | def data(self):
88 | """Get the current model runs data."""
89 | return self._model_iteration.data
90 |
91 | def _update(self, factor: pd.Series) -> "ActivityResampling":
92 | step = factor.name
93 |
94 | factor = (
95 | self.data.merge(factor, how="left", left_on=factor.index.names, right_index=True)[step]
96 | .astype(float)
97 | .fillna(1.0)
98 | )
99 |
100 | self.factors.append(factor)
101 |
102 | return self
103 |
104 | def demographic_adjustment(self) -> "ActivityResampling":
105 | """Perform the demograhic adjustment."""
106 | year = str(self.run_params["year"])
107 | variant = self.run_params["variant"]
108 |
109 | factor = self.demog_factors.loc[(variant, slice(None), slice(None))][year].rename(
110 | "demographic_adjustment"
111 | )
112 |
113 | groups = set(self.data["group"]) - {"maternity"}
114 | factor: pd.Series = pd.concat({i: factor for i in groups}) # type: ignore
115 | factor.index.names = ["group", *factor.index.names[1:]]
116 |
117 | return self._update(factor)
118 |
119 | def birth_adjustment(self) -> "ActivityResampling":
120 | """Perform the birth adjustment."""
121 | year = str(self.run_params["year"])
122 | variant = self.run_params["variant"]
123 |
124 | factor = self.birth_factors.loc[([variant], slice(None), slice(None))][year]
125 |
126 | factor = pd.Series(
127 | factor.values,
128 | name="birth_adjustment",
129 | index=pd.MultiIndex.from_tuples(
130 | [("maternity", a, s) for _, a, s in factor.index.to_numpy()],
131 | names=["group", "age", "sex"],
132 | ),
133 | )
134 |
135 | return self._update(factor)
136 |
137 | def health_status_adjustment(self) -> "ActivityResampling":
138 | """Perform the health status adjustment."""
139 | if not self.params["health_status_adjustment"]:
140 | return self
141 |
142 | return self._update(self.hsa.run(self.run_params))
143 |
144 | def inequalities_adjustment(self) -> "ActivityResampling":
145 | """Perform the inequalities adjustment."""
146 | activity_type = self._activity_type
147 |
148 | match activity_type:
149 | case "op":
150 | factor_key = "procedure"
151 | case "ip":
152 | factor_key = "elective"
153 | case _:
154 | return self
155 |
156 | if not self.params["inequalities"]:
157 | return self
158 |
159 | # TODO: currently only works for provider level model (we overwrite provider in PBM)
160 | # We need to match on ICB *and* provider for PBM
161 |
162 | factor = self.inequalities_factors.set_index(["icb", "sushrg_trimmed", "imd_quintile"])[
163 | "factor"
164 | ]
165 |
166 | factor: pd.Series = pd.concat({factor_key: factor}, names=["group"]) # type: ignore
167 | factor.name = "inequalities"
168 | return self._update(factor)
169 |
170 | def expat_adjustment(self) -> "ActivityResampling":
171 | """Perform the expatriation adjustment."""
172 | params = {
173 | k: v
174 | for k, v in self.run_params["expat"][self._activity_type].items()
175 | if v # remove empty values from the dictionary
176 | }
177 | if not params:
178 | return self
179 |
180 | factor: pd.Series = pd.concat( # type: ignore
181 | {k: pd.Series(v, name="expat") for k, v in params.items()}
182 | )
183 | factor.index.names = ["group", "tretspef_grouped"]
184 | return self._update(factor)
185 |
186 | def repat_adjustment(self) -> "ActivityResampling":
187 | """Perform the repatriation adjustment."""
188 | params = {
189 | (is_main_icb, k): pd.Series(v, name="repat")
190 | for (is_main_icb, repat_type) in [
191 | (1, "repat_local"),
192 | (0, "repat_nonlocal"),
193 | ]
194 | for k, v in self.run_params[repat_type][self._activity_type].items()
195 | if v # remove empty values from the dictionary
196 | }
197 | if not params:
198 | return self
199 |
200 | factor: pd.Series = pd.concat(params) # type: ignore
201 | factor.index.names = ["is_main_icb", "group", "tretspef_grouped"]
202 | return self._update(factor)
203 |
204 | def baseline_adjustment(self) -> "ActivityResampling":
205 | """Perform the baseline adjustment.
206 |
207 | A value of 1 will indicate that we want to sample this row at the baseline rate. A value
208 | less that 1 will indicate we want to sample that row less often that in the baseline, and
209 | a value greater than 1 will indicate that we want to sample that row more often than in the
210 | baseline.
211 | """
212 | if not (params := self.run_params["baseline_adjustment"][self._activity_type]):
213 | return self
214 |
215 | factor: pd.Series = pd.concat( # type: ignore
216 | {
217 | k: pd.Series(v, name="baseline_adjustment", dtype="float64")
218 | for k, v in params.items()
219 | }
220 | )
221 | factor.index.names = ["group", "tretspef_grouped"]
222 | return self._update(factor)
223 |
224 | def waiting_list_adjustment(self) -> "ActivityResampling":
225 | """Perform the waiting list adjustment.
226 |
227 | A value of 1 will indicate that we want to sample this row at the baseline rate. A value
228 | less that 1 will indicate we want to sample that row less often that in the baseline, and
229 | a value greater than 1 will indicate that we want to sample that row more often than in the
230 | baseline.
231 | """
232 | activity_type = self._activity_type
233 | if activity_type == "aae":
234 | return self
235 |
236 | if not (params := self.run_params["waiting_list_adjustment"][activity_type]):
237 | return self
238 |
239 | factor = pd.Series(params)
240 |
241 | # update the index to include "True" for the is_wla field
242 | factor.index = pd.MultiIndex.from_tuples(
243 | [(True, i) for i in factor.index], names=["is_wla", "tretspef_grouped"]
244 | )
245 | factor.name = "waiting_list_adjustment"
246 |
247 | return self._update(factor)
248 |
249 | def non_demographic_adjustment(self) -> "ActivityResampling":
250 | """Perform the non-demographic adjustment."""
251 | if not (params := self.run_params["non-demographic_adjustment"][self._activity_type]):
252 | return self
253 |
254 | match self.params["non-demographic_adjustment"]["value-type"]:
255 | case "year-on-year-growth":
256 | year_exponent = self.run_params["year"] - self.params["start_year"]
257 | case x:
258 | raise ValueError(f"invalid value-type: {x}")
259 |
260 | factor = pd.Series(params).rename("non-demographic_adjustment") ** year_exponent
261 | factor.index.names = ["ndggrp"]
262 | return self._update(factor)
263 |
264 | def apply_resampling(self) -> tuple[pd.DataFrame, pd.DataFrame]:
265 | """Apply the row resampling to the data."""
266 | # get the random sampling for each row
267 | rng = self._model_iteration.rng
268 | factors = pd.concat(self.factors, axis=1)
269 |
270 | # reshape this to be the same as baseline counts
271 | overall_factor = (
272 | self._model_iteration.model.baseline_counts * factors.prod(axis=1).to_numpy()
273 | )
274 |
275 | row_samples: np.ndarray = rng.poisson(overall_factor) # ty: ignore[invalid-assignment]
276 |
277 | step_counts = self._model_iteration.fix_step_counts(
278 | self.data, row_samples, factors, "model_interaction_term"
279 | ).assign(strategy="-")
280 |
281 | # apply the random sampling, update the data and get the counts
282 | data = self._model_iteration.model.apply_resampling(row_samples, self.data)
283 |
284 | return data, step_counts
285 |
--------------------------------------------------------------------------------
/tests/unit/nhp/model/test_outpatients.py:
--------------------------------------------------------------------------------
1 | """Test outpatients model."""
2 |
3 | from unittest.mock import Mock, call, patch
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import pytest
8 |
9 | from nhp.model.outpatients import OutpatientsModel
10 |
11 |
12 | # fixtures
13 | @pytest.fixture
14 | def mock_model():
15 | """Create a mock Model instance."""
16 | with patch.object(OutpatientsModel, "__init__", lambda s, p, d, h, r: None):
17 | mdl = OutpatientsModel(None, None, None, None) # type: ignore
18 | mdl.model_type = "op"
19 | mdl.params = {
20 | "dataset": "synthetic",
21 | "model_runs": 3,
22 | "seed": 1,
23 | "demographic_factors": {
24 | "file": "demographics_file.csv",
25 | "variant_probabilities": {"a": 0.6, "b": 0.4},
26 | },
27 | "start_year": 2018,
28 | "end_year": 2020,
29 | "health_status_adjustment": [0.8, 1.0],
30 | "waiting_list_adjustment": "waiting_list_adjustment",
31 | "expat": {"op": {"Other": [0.7, 0.9]}},
32 | "repat_local": {"op": {"Other": [1.0, 1.2]}},
33 | "repat_nonlocal": {"op": {"Other": [1.3, 1.5]}},
34 | "non-demographic_adjustment": {
35 | "a": {"a_a": [1, 1.2], "a_b": [1, 1.2]},
36 | "b": {"b_a": [1, 1.2], "b_b": [1, 1.2]},
37 | },
38 | "inpatient_factors": {
39 | "admission_avoidance": {
40 | "a_a": {"interval": [0.4, 0.6]},
41 | "a_b": {"interval": [0.4, 0.6]},
42 | },
43 | "los_reduction": {
44 | "b_a": {"interval": [0.4, 0.6]},
45 | "b_b": {"interval": [0.4, 0.6]},
46 | },
47 | },
48 | "outpatient_factors": {
49 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}},
50 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}},
51 | },
52 | "op_factors": {
53 | "a": {"a_a": {"interval": [0.4, 0.6]}, "a_b": {"interval": [0.4, 0.6]}},
54 | "b": {"b_a": {"interval": [0.4, 0.6]}, "b_b": {"interval": [0.4, 0.6]}},
55 | },
56 | }
57 | # create a minimal data object for testing
58 | mdl.data = pd.DataFrame(
59 | {
60 | "rn": list(range(1, 21)),
61 | "age": list(range(1, 6)) * 4,
62 | "sex": ([1] * 5 + [2] * 5) * 2,
63 | "hsagrp": [x for _ in range(1, 11) for x in ["op_a_a", "op_b_b"]],
64 | }
65 | )
66 | return mdl
67 |
68 |
69 | # methods
70 |
71 |
72 | def test_init_calls_super_init(mocker):
73 | """Test that the model calls the super method."""
74 | # arrange
75 | super_mock = mocker.patch("nhp.model.outpatients.super")
76 | # act
77 | OutpatientsModel("params", "data_path", "hsa", "run_params") # type: ignore
78 | # assert
79 | super_mock.assert_called_once()
80 |
81 |
82 | def test_get_data(mock_model):
83 | # arrange
84 | mdl = mock_model
85 | data_loader = Mock()
86 | data_loader.get_op.return_value = "op data"
87 |
88 | # act
89 | actual = mdl._get_data(data_loader)
90 |
91 | # assert
92 | assert actual == "op data"
93 | data_loader.get_op.assert_called_once_with()
94 |
95 |
96 | def test_get_data_counts(mock_model):
97 | # arrange
98 | mdl = mock_model
99 | data = mdl.data
100 | data["attendances"] = list(range(1, 21))
101 | data["tele_attendances"] = list(range(21, 41))
102 | # act
103 | actual = mdl.get_data_counts(data)
104 | # assert
105 | assert actual.tolist() == [
106 | [float(i) for i in range(1, 21)],
107 | [float(i) for i in range(21, 41)],
108 | ]
109 |
110 |
111 | def test_load_strategies(mock_model):
112 | # arrange
113 | mdl = mock_model
114 | mdl.data["has_procedures"] = [True] * 10 + [False] * 10
115 | mdl.data["is_first"] = ([True] * 5 + [False] * 5) * 2
116 | mdl.data["is_cons_cons_ref"] = [True] * 10 + [False] * 10
117 | mdl.data["type"] = ["a", "b", "c", "d", "e"] * 4
118 | mdl.data["is_gp_ref"] = [False] * 10 + [True] * 10
119 | # act
120 | mdl._load_strategies(None)
121 | # assert
122 | assert mdl.strategies["activity_avoidance"]["strategy"].to_list() == [
123 | f"{i}_{j}"
124 | for i in ["followup_reduction"]
125 | + ["consultant_to_consultant_reduction"] * 2
126 | + ["gp_referred_first_attendance_reduction"]
127 | for j in ["a", "b", "c", "d", "e"]
128 | ]
129 | assert mdl.strategies["activity_avoidance"]["sample_rate"].to_list() == [1] * 20
130 |
131 |
132 | def test_convert_to_tele(mock_model):
133 | """Test that it mutates the data."""
134 | # arrange
135 | mdl = mock_model
136 |
137 | mr_mock = Mock()
138 | mr_mock.rng.binomial.return_value = np.array([10, 15, 0, 20, 25, 0, 30, 35, 0, 40, 45, 0])
139 | data = pd.DataFrame(
140 | {
141 | "rn": range(12),
142 | "pod": (["a"] * 3 + ["b"] * 3) * 2,
143 | "sitetret": ["c"] * 6 + ["d"] * 6,
144 | "has_procedures": [False, False, True] * 4,
145 | "type": ["a", "b", "a"] * 4,
146 | "attendances": [20, 25, 30] * 4,
147 | "tele_attendances": [5, 10, 0] * 4,
148 | }
149 | )
150 | mr_mock.run_params = {
151 | "efficiencies": {"op": {"convert_to_tele_a": 0.25, "convert_to_tele_b": 0.5}}
152 | }
153 | mr_mock.model.strategies = {
154 | "efficiencies": pd.DataFrame(
155 | [
156 | {"rn": k, "strategy": "convert_to_tele_a" if k % 2 else "convert_to_tele_b"}
157 | for k in data["rn"]
158 | ]
159 | ).set_index("rn")
160 | }
161 |
162 | # act
163 | actual_data, actual_step_counts = mdl._convert_to_tele(data.copy(), mr_mock)
164 |
165 | # assert
166 | assert actual_data["attendances"].to_list() == [
167 | 10,
168 | 10,
169 | 30,
170 | 0,
171 | 0,
172 | 30,
173 | -10,
174 | -10,
175 | 30,
176 | -20,
177 | -20,
178 | 30,
179 | ]
180 | assert actual_data["tele_attendances"].to_list() == [
181 | 15,
182 | 25,
183 | 0,
184 | 25,
185 | 35,
186 | 0,
187 | 35,
188 | 45,
189 | 0,
190 | 45,
191 | 55,
192 | 0,
193 | ]
194 |
195 | assert mr_mock.rng.binomial.call_args == call(
196 | data["attendances"].to_list(), [i for _ in range(6) for i in [0.5, 0.75]]
197 | )
198 |
199 | assert actual_step_counts.to_dict("list") == {
200 | "pod": ["a", "a", "b", "b"],
201 | "sitetret": ["c", "d", "c", "d"],
202 | "change_factor": [
203 | "efficiencies",
204 | "efficiencies",
205 | "efficiencies",
206 | "efficiencies",
207 | ],
208 | "strategy": [
209 | "convert_to_tele",
210 | "convert_to_tele",
211 | "convert_to_tele",
212 | "convert_to_tele",
213 | ],
214 | "attendances": [-25, -65, -45, -85],
215 | "tele_attendances": [25, 65, 45, 85],
216 | }
217 |
218 |
219 | def test_apply_resampling(mocker, mock_model):
220 | # arrange
221 | row_samples = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
222 | # act
223 | data = mock_model.apply_resampling(row_samples, pd.DataFrame())
224 | # assert
225 | assert data["attendances"].to_list() == [1, 2, 3, 4]
226 | assert data["tele_attendances"].to_list() == [5, 6, 7, 8]
227 |
228 |
229 | def test_efficiencies(mock_model):
230 | """Test that it runs the model steps."""
231 | # arrange
232 | mdl = mock_model
233 | data = pd.DataFrame({"x": [1]})
234 |
235 | mdl._convert_to_tele = Mock(return_value=("data", "step_counts"))
236 |
237 | # act
238 | actual = mdl.efficiencies(data, "model_run")
239 |
240 | # assert
241 | assert actual == ("data", "step_counts")
242 |
243 | mdl._convert_to_tele.assert_called_once()
244 | assert mdl._convert_to_tele.call_args[0][0].to_dict("list") == {"x": [1]}
245 | assert mdl._convert_to_tele.call_args[0][1] == "model_run"
246 |
247 |
248 | def test_process_results(mock_model):
249 | # arrange
250 | df = pd.DataFrame(
251 | {
252 | "sitetret": ["trust"] * 4,
253 | "is_first": [True, True, False, False],
254 | "has_procedures": [False, True, False, True],
255 | "tretspef": [1, 1, 1, 1],
256 | "tretspef_grouped": [1, 1, 1, 1],
257 | "rn": [1, 2, 3, 4],
258 | "attendances": [5, 6, 7, 8],
259 | "tele_attendances": [9, 10, 11, 12],
260 | "age": [1, 1, 1, 1],
261 | "age_group": [1, 1, 1, 1],
262 | "sex": [1, 1, 1, 1],
263 | "pod": ["op_first", "op_procedure", "op_follow-up", "op_procedure"],
264 | }
265 | )
266 | expected = {
267 | "pod": [k for k in ["op_first", "op_follow-up", "op_procedure"] for _ in [0, 1]],
268 | "sitetret": ["trust"] * 6,
269 | "measure": ["attendances", "tele_attendances"] * 3,
270 | "sex": [1] * 6,
271 | "age": [1] * 6,
272 | "age_group": [1] * 6,
273 | "tretspef": [1] * 6,
274 | "tretspef_grouped": [1] * 6,
275 | "value": [5, 9, 7, 11, 14, 22],
276 | }
277 | # act
278 | actual = mock_model.process_results(df)
279 | # assert
280 | assert actual.to_dict("list") == expected
281 |
282 |
283 | def test_specific_aggregations(mocker, mock_model):
284 | """Test that it aggregates the results correctly."""
285 | # arrange
286 | m = mocker.patch("nhp.model.OutpatientsModel.get_agg", return_value="agg_data")
287 |
288 | mdl = mock_model
289 |
290 | # act
291 | actual = mdl.specific_aggregations("results") # type: ignore
292 |
293 | # assert
294 | assert actual == {
295 | "sex+tretspef_grouped": "agg_data",
296 | "tretspef": "agg_data",
297 | }
298 |
299 | assert m.call_args_list == [
300 | call("results", "sex", "tretspef_grouped"),
301 | call("results", "tretspef"),
302 | ]
303 |
304 |
305 | def test_save_results(mocker, mock_model):
306 | """Test that it correctly saves the results."""
307 |
308 | def path_fn(x):
309 | return x
310 |
311 | mr_mock = Mock()
312 | mr_mock.get_model_results.return_value = pd.DataFrame(
313 | {"rn": [0], "attendances": [1], "tele_attendances": [2]}
314 | )
315 | mr_mock.avoided_activity = pd.DataFrame(
316 | {"rn": [0], "attendances": [1], "tele_attendances": [0]}
317 | )
318 |
319 | to_parquet_mock = mocker.patch("pandas.DataFrame.to_parquet")
320 | mock_model.save_results(mr_mock, path_fn)
321 | assert to_parquet_mock.call_args_list == [
322 | call("op/0.parquet"),
323 | call("op_avoided/0.parquet"),
324 | ]
325 |
326 |
327 | def test_calculate_avoided_activity(mock_model):
328 | # arrange
329 | data = pd.DataFrame({"rn": [0], "attendances": [4], "tele_attendances": [3]})
330 | data_resampled = pd.DataFrame({"rn": [0], "attendances": [2], "tele_attendances": [1]})
331 | # act
332 | actual = mock_model.calculate_avoided_activity(data, data_resampled)
333 | # assert
334 | assert actual.to_dict(orient="list") == {
335 | "rn": [0],
336 | "attendances": [2],
337 | "tele_attendances": [2],
338 | }
339 |
--------------------------------------------------------------------------------
/src/nhp/model/results.py:
--------------------------------------------------------------------------------
1 | """Methods to work with results of the model.
2 |
3 | This module allows you to work with the results of the model. Namely, combining the monte-carlo runs
4 | into a single panda's dataframe, and helping with saving the results files.
5 | """
6 |
7 | import json
8 | import logging
9 | import os
10 | from typing import Dict, List
11 |
12 | import janitor
13 | import pandas as pd
14 |
15 | from nhp.model.model_iteration import ModelRunResult
16 |
17 |
18 | def _complete_model_runs(
19 | res: List[pd.DataFrame], model_runs: int, include_baseline: bool = True
20 | ) -> pd.DataFrame:
21 | """Complete the data frame for all model runs.
22 |
23 | If any aggregation returns rows for only some of the model runs, we need to add a "0" row for
24 | that run.
25 |
26 | Args:
27 | res: List of model results.
28 | model_runs: The number of model runs.
29 | include_baseline: Whether to include model run 0 (the baseline) or not. Defaults to True.
30 |
31 | Returns:
32 | Combined and completed data frame.
33 | """
34 | results = pd.concat(res)
35 | results: pd.DataFrame = results.groupby( # type: ignore
36 | [i for i in results.columns if i != "value"], as_index=False
37 | )["value"].sum()
38 |
39 | return janitor.complete(
40 | results,
41 | [i for i in results.columns if i != "model_run" if i != "value"],
42 | {"model_run": range(0 if include_baseline else 1, model_runs + 1)},
43 | fill_value={"value": 0},
44 | )
45 |
46 |
47 | def _combine_model_results(
48 | results: list[list[ModelRunResult]],
49 | ) -> dict[str, pd.DataFrame]:
50 | """Combine the results of the monte carlo runs.
51 |
52 | Takes as input a list of lists, where the outer list contains an item for inpatients,
53 | outpatients and a&e runs, and the inner list contains the results of the monte carlo runs.
54 |
55 | Args:
56 | results: A list containing the model results.
57 |
58 | Returns:
59 | Dictionary containing the combined model results.
60 | """
61 | aggregations = sorted(list({k for r in results for v, _ in r for k in v.keys()}))
62 |
63 | model_runs = len(results[0]) - 1
64 |
65 | return {
66 | k: _complete_model_runs(
67 | [
68 | v[k].reset_index().assign(model_run=i)
69 | for r in results
70 | for (i, (v, _)) in enumerate(r)
71 | if k in v
72 | ],
73 | model_runs,
74 | )
75 | for k in aggregations
76 | }
77 |
78 |
79 | def _combine_step_counts(results: list) -> pd.DataFrame:
80 | """Combine the step counts of the monte carlo runs.
81 |
82 | Takes as input a list of lists, where the outer list contains an item for inpatients,
83 | outpatients and a&e runs, and the inner list contains the results of the monte carlo runs.
84 |
85 | Args:
86 | results: A list containing the model results.
87 |
88 | Returns:
89 | DataFrame containing the model step counts.
90 | """
91 | model_runs = len(results[0]) - 1
92 | return _complete_model_runs(
93 | [
94 | v
95 | # TODO: handle the case of daycase conversion, it's duplicating values
96 | # need to figure out exactly why, but this masks the issue for now
97 | .groupby(v.index.names)
98 | .sum()
99 | .reset_index()
100 | .assign(model_run=i)
101 | for r in results
102 | for i, (_, v) in enumerate(r)
103 | if i > 0
104 | ],
105 | model_runs,
106 | include_baseline=False,
107 | )
108 |
109 |
110 | def generate_results_json(
111 | combined_results: dict[str, pd.DataFrame],
112 | combined_step_counts: pd.DataFrame,
113 | params: dict,
114 | run_params: dict,
115 | ) -> str:
116 | """Generate the results in the json format and save."""
117 |
118 | def agg_to_dict(res):
119 | results_df = res.set_index("model_run")
120 | return (
121 | pd.concat(
122 | [
123 | results_df.loc[0]
124 | .set_index([i for i in results_df.columns if i != "value"])
125 | .rename(columns={"value": "baseline"}),
126 | results_df.loc[results_df.index != 0]
127 | .groupby([i for i in results_df.columns if i != "value"])
128 | .agg(list)
129 | .rename(columns={"value": "model_runs"}),
130 | ],
131 | axis=1,
132 | )
133 | .reset_index()
134 | .to_dict(orient="records")
135 | )
136 |
137 | dict_results = {k: agg_to_dict(v) for k, v in combined_results.items()}
138 |
139 | dict_results["step_counts"] = (
140 | combined_step_counts.groupby( # ty: ignore[no-matching-overload]
141 | [
142 | "pod",
143 | "change_factor",
144 | "strategy",
145 | "sitetret",
146 | "activity_type",
147 | "measure",
148 | ]
149 | )[["value"]]
150 | .agg(list)
151 | .reset_index()
152 | .to_dict("records")
153 | )
154 |
155 | for i in dict_results["step_counts"]:
156 | i["model_runs"] = i.pop("value")
157 | if i["change_factor"] == "baseline":
158 | i["model_runs"] = i["model_runs"][0:1]
159 | if i["strategy"] == "-":
160 | i.pop("strategy")
161 |
162 | filename = f"{params['dataset']}/{params['scenario']}-{params['create_datetime']}"
163 | os.makedirs(f"results/{params['dataset']}", exist_ok=True)
164 | with open(f"results/{filename}.json", "w", encoding="utf-8") as file:
165 | json.dump(
166 | {
167 | "params": params,
168 | "population_variants": run_params["variant"],
169 | "results": dict_results,
170 | },
171 | file,
172 | )
173 | return filename
174 |
175 |
176 | def save_results_files(results: dict, params: dict) -> list:
177 | """Save aggregated and combined results as parquet, and params as JSON.
178 |
179 | Args:
180 | results: The results of running the models, processed into one dictionary.
181 | params: The parameters used for the model run.
182 |
183 | Returns:
184 | Filepaths to saved files.
185 | """
186 | path = f"results/{params['dataset']}/{params['scenario']}/{params['create_datetime']}"
187 | os.makedirs(path, exist_ok=True)
188 |
189 | return [
190 | *[_save_parquet_file(path, k, v, params) for k, v in results.items()],
191 | _save_params_file(path, params),
192 | ]
193 |
194 |
195 | def _add_metadata_to_dataframe(df: pd.DataFrame, params: dict) -> pd.DataFrame:
196 | """Add metadata as columns to the dataframe.
197 |
198 | Add metadata as columns to the dataframe, so that the saved parquet files have useful
199 | information regarding their provenance.
200 |
201 | Args:
202 | df: The dataframe that we want to add the metadata to.
203 | params: The parameters for the model run, which include metadata.
204 |
205 | Returns:
206 | The dataframe, with additional columns "dataset", "scenario" and "create_datetime".
207 | """
208 | metadata_to_save = ["dataset", "scenario", "app_version", "create_datetime"]
209 | for m in metadata_to_save:
210 | df[m] = params[m]
211 | return df
212 |
213 |
214 | def _save_parquet_file(path: str, results_name: str, results_df: pd.DataFrame, params: dict) -> str:
215 | """Save a results dataframe as parquet.
216 |
217 | Args:
218 | path: The folder where we want to save the results to.
219 | results_name: The name of this aggregation.
220 | results_df: The results dataframe.
221 | params: The parameters for the model run.
222 |
223 | Returns:
224 | The filename of the saved file.
225 | """
226 | results_df = _add_metadata_to_dataframe(results_df, params)
227 | results_df.to_parquet(filename := f"{path}/{results_name}.parquet")
228 | return filename
229 |
230 |
231 | def _save_params_file(path: str, params: dict) -> str:
232 | """Save the model runs parameters as json.
233 |
234 | Args:
235 | path: The folder where we want to save the results to.
236 | params: The parameters the model was run with.
237 |
238 | Returns:
239 | The filename of the saved file.
240 | """
241 | with open(filename := f"{path}/params.json", "w", encoding="utf-8") as file:
242 | json.dump(params, file)
243 | return filename
244 |
245 |
246 | def _patch_converted_sdec_activity(
247 | results: Dict[str, pd.DataFrame], column: str, col_value: str
248 | ) -> None:
249 | """Patch the converted SDEC activity in the dataframe."""
250 | results_df = results[column]
251 | agg_cols = ["pod", "sitetret", "measure", "model_run"]
252 |
253 | default_sdec = (
254 | results["default"].query("pod == 'aae_type-05'").set_index(agg_cols)["value"].rename("b")
255 | )
256 |
257 | missing_sdec_activity = (
258 | pd.concat(
259 | [
260 | default_sdec,
261 | (
262 | results_df.query("pod == 'aae_type-05'")
263 | .groupby(agg_cols)["value"] # ty: ignore[no-matching-overload]
264 | .sum()
265 | .rename("a")
266 | ),
267 | ],
268 | axis=1,
269 | )
270 | .fillna(0)
271 | .reset_index()
272 | .assign(value=lambda x: x["b"] - x["a"])
273 | .drop(columns=["b", "a"])
274 | )
275 | missing_sdec_activity[column] = col_value
276 |
277 | df_fixed = (
278 | pd.concat([results_df, missing_sdec_activity], axis=0)
279 | .groupby(
280 | ["pod", "sitetret", "measure", column, "model_run"],
281 | as_index=False,
282 | ) # ty: ignore[no-matching-overload]
283 | .sum()
284 | )
285 |
286 | df_fixed["value"] = df_fixed["value"].astype("int64")
287 |
288 | results[column] = df_fixed
289 |
290 |
291 | def combine_results(
292 | results: list[list[ModelRunResult]],
293 | ) -> tuple[dict[str, pd.DataFrame], pd.DataFrame]:
294 | """Combine the results into a single dictionary.
295 |
296 | When we run the models we have an array containing 3 items [inpatients, outpatient, a&e].
297 | Each of which contains one item for each model run, which is a dictionary.
298 |
299 | Args:
300 | results: The results of running the models.
301 |
302 | Returns:
303 | Tuple containing combined model results dictionary and combined step counts DataFrame.
304 | """
305 | logging.info(" * starting to combine results")
306 |
307 | combined_results = _combine_model_results(results)
308 | combined_step_counts = _combine_step_counts(results)
309 |
310 | # TODO: this is a bit of a hack, but we need to patch the converted SDEC activity
311 | # because inpatients activity is aggregated differently to a&e, the a&e aggregations will be
312 | # missing the converted SDEC activity, so we need to add it back in
313 | _patch_converted_sdec_activity(combined_results, "acuity", "standard")
314 | _patch_converted_sdec_activity(combined_results, "attendance_category", "1")
315 |
316 | logging.info(" * finished combining results")
317 | return combined_results, combined_step_counts
318 |
--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
1 | # NHP Model - Copilot Coding Agent Instructions
2 |
3 | ## Repository Overview
4 |
5 | This is the **New Hospital Programme (NHP) Demand Model**, a Python package for healthcare activity prediction. The model provides modeling capabilities for inpatients, outpatients, and A&E (Accident & Emergency) services. It is built as a Python library using modern packaging tools and is deployed as both a Python package and a Docker container to Azure.
6 |
7 | **Key Facts:**
8 | - **Project Type:** Python package/library with Docker containerization
9 | - **Python Version:** Requires Python 3.11 or higher (specified in pyproject.toml)
10 | - **Package Manager:** `uv` (modern Python package manager from Astral)
11 | - **Build System:** setuptools with setuptools-scm for versioning
12 | - **Primary Language:** Python
13 | - **Project Size:** Medium-sized Python project
14 | - **Main Modules:** nhp.model (core model code), nhp.docker (Docker runtime)
15 |
16 | ## Environment Setup and Build Instructions
17 |
18 | ### Initial Setup
19 |
20 | **ALWAYS start by installing uv and project dependencies:**
21 |
22 | ```bash
23 | # Install uv using the recommended approach from Astral
24 | curl -LsSf https://astral.sh/uv/install.sh | sh
25 |
26 | # Install project dependencies (production only)
27 | uv sync
28 |
29 | # Install with dev dependencies for development/testing (RECOMMENDED for development)
30 | uv sync --extra dev
31 |
32 | # Install with docs dependencies for documentation
33 | uv sync --extra docs
34 |
35 | # Install multiple extras at once
36 | uv sync --extra dev --extra docs
37 | ```
38 |
39 | **Important:** The `uv sync` command only installs production dependencies. For development work (linting, testing), use `uv sync --extra dev` to install the dev dependencies.
40 |
41 | **Python Version:** The project requires Python 3.11+. The CI uses Python 3.11 specifically via `uv python install` in workflows.
42 |
43 | ### Build Commands
44 |
45 | **To build the package:**
46 |
47 | ```bash
48 | # Standard build - creates wheel and source distribution
49 | uv build
50 |
51 | # Build for development (sets version to 0.dev0)
52 | SETUPTOOLS_SCM_PRETEND_VERSION=0.dev0 uv build
53 | ```
54 |
55 | The build creates:
56 | - `dist/nhp_model--py3-none-any.whl`
57 | - `dist/nhp_model-.tar.gz`
58 |
59 | **Note:** The Dockerfile includes a TODO comment about forcing version numbers during Docker builds. Currently it uses `ENV SETUPTOOLS_SCM_PRETEND_VERSION=v0.0.0` as a workaround.
60 |
61 | ### Testing
62 |
63 | **Unit Tests (ALWAYS run these before committing):**
64 |
65 | ```bash
66 | # Run all unit tests
67 | uv run pytest tests/unit --verbose
68 |
69 | # Run unit tests with coverage report
70 | uv run pytest --cov=. tests/unit --ignore=tests --cov-branch --cov-report xml:coverage.xml
71 | ```
72 |
73 | **Integration Tests:**
74 |
75 | ```bash
76 | # Integration tests require test data in a specific format
77 | # These are located in tests/integration/ but may require data setup
78 | uv run pytest tests/integration --verbose
79 | ```
80 |
81 | **All unit tests must pass. Test failures are NOT acceptable.**
82 |
83 | ### Linting and Formatting
84 |
85 | **ALWAYS run linting before committing. All linting checks MUST pass:**
86 |
87 | ```bash
88 | # Run ruff linting check
89 | uvx ruff check .
90 |
91 | # Run ruff format check (no auto-formatting)
92 | uvx ruff format --check .
93 |
94 | # Auto-format code (if needed)
95 | uvx ruff format .
96 |
97 | # Run type checking with ty
98 | uvx ty check .
99 | ```
100 |
101 | **Linting Configuration:**
102 | - Ruff config is in `pyproject.toml` under `[tool.ruff]`
103 | - Line length: 100 characters
104 | - Target Python version: 3.11
105 | - Excludes: `notebooks/` directory
106 | - Key rules: pydocstyle (D), pycodestyle (E/W), isort (I), pylint (PL), pandas-vet (PD), numpy (NPY), ruff-specific (RUF)
107 | - Docstring convention: Google style
108 |
109 | **The notebooks directory is excluded from linting and should not be linted.**
110 |
111 | ### Documentation
112 |
113 | ```bash
114 | # Build documentation (requires docs dependencies)
115 | uv run mkdocs build --clean
116 |
117 | # Serve documentation locally
118 | uv run mkdocs serve
119 | ```
120 |
121 | Documentation is deployed automatically to Connect via CI on main branch pushes.
122 |
123 | ### Running the Model
124 |
125 | **Local execution:**
126 |
127 | ```bash
128 | # Run with sample parameters (requires data in specified path)
129 | uv run python -m nhp.model queue/params-sample.json -d data/synth --type all
130 |
131 | # Run single model type
132 | uv run python -m nhp.model queue/params-sample.json -d data --type ip # inpatients
133 | uv run python -m nhp.model queue/params-sample.json -d data --type op # outpatients
134 | uv run python -m nhp.model queue/params-sample.json -d data --type aae # A&E
135 |
136 | # Run specific model iteration for debugging
137 | uv run python -m nhp.model queue/params-sample.json -d data --model-run 1 --type ip
138 | ```
139 |
140 | **Command-line arguments:**
141 | - `params_file`: Path to JSON parameters file (default: `queue/params-sample.json`)
142 | - `-d, --data-path`: Path to data directory (default: `data`)
143 | - `-r, --model-run`: Which model iteration to run (default: 1)
144 | - `-t, --type`: Model type - `all`, `ip`, `op`, or `aae` (default: `all`)
145 | - `--save-full-model-results`: Save complete model results
146 |
147 | **Data Requirements:**
148 | The model expects data in parquet format organized by fiscal year and dataset:
149 | - Format: `{data_path}/{file}/fyear={year}/dataset={dataset}/`
150 | - Required files: `ip`, `op`, `aae`, `demographic_factors`, `birth_factors`, `hsa_activity_tables`, `hsa_gams` (pickle)
151 | - Sample data location: `data/synth/` (synthetic dataset for testing - see GitHub issue #347)
152 |
153 | ## Project Structure
154 |
155 | ### Directory Layout
156 |
157 | **Core Directories:**
158 | - `.github/workflows/` - CI/CD pipelines (linting, codecov, build, deploy)
159 | - `src/nhp/model/` - Core model: `__main__.py`, `model.py`, `inpatients.py`, `outpatients.py`, `aae.py`, `run.py`, `results.py`, `data/`
160 | - `src/nhp/docker/` - Docker runtime with Azure Storage integration
161 | - `tests/unit/` - Unit tests
162 | - `tests/integration/` - Integration tests (require data)
163 | - `docs/` - MkDocs documentation
164 | - `notebooks/` - Databricks notebooks (excluded from linting)
165 | - `queue/` - Parameter files (params-sample.json)
166 |
167 | **Key Configuration Files:**
168 | - `pyproject.toml` - Project metadata, dependencies, ruff/pytest/setuptools config
169 | - `uv.lock` - Locked dependency versions (DO NOT modify manually)
170 | - `params-schema.json` - JSON schema for model parameters (deployed to GitHub Pages)
171 |
172 | ### Architecture Overview
173 |
174 | **Model Hierarchy:**
175 | - `Model` (base class in model.py) - Common model functionality
176 | - `InpatientsModel` - Inpatient demand modeling
177 | - `OutpatientsModel` - Outpatient demand modeling
178 | - `AaEModel` - A&E demand modeling
179 |
180 | **Execution Flow:**
181 | 1. `__main__.py` parses CLI arguments and loads parameters
182 | 2. `run.py` orchestrates model execution (single or parallel runs)
183 | 3. `ModelIteration` runs a single model iteration
184 | 4. Results are aggregated and saved by `results.py`
185 |
186 | **Data Loading:**
187 | - Abstract `Data` interface allows multiple data sources
188 | - `Local` loads from local parquet files
189 | - `DatabricksNational` loads from Databricks (used in notebooks)
190 |
191 | ## CI/CD Validation Pipeline
192 |
193 | ### Pull Request Checks
194 |
195 | **Every pull request triggers these workflows (ALL MUST PASS):**
196 |
197 | 1. **Linting** (`.github/workflows/linting.yaml`):
198 | - `ruff check` - Code quality checks
199 | - `ruff format --check` - Code formatting verification
200 | - `ty check .` - Type checking
201 |
202 | 2. **Code Coverage** (`.github/workflows/codecov.yaml`):
203 | - Runs unit tests with coverage
204 | - Uploads to Codecov
205 | - Requires passing tests
206 |
207 | **IMPORTANT:** All linting and test checks must pass before merge. DO NOT skip or disable these checks.
208 |
209 | ### Main Branch / Release Workflows
210 |
211 | On push to main or tags:
212 |
213 | 1. **build_app.yaml**: Builds Python wheel, uploads to Azure Storage and GitHub releases
214 | 2. **build_schema.yaml**: Deploys params-schema.json to GitHub Pages
215 | 3. **build_container.yaml**: Builds and pushes Docker image to GitHub Container Registry
216 | 4. **deploy_docs.yaml**: Builds and deploys MkDocs documentation to RStudio Connect
217 |
218 | ### Docker Deployment
219 |
220 | The model is containerized using:
221 | - Base image: `ghcr.io/astral-sh/uv:python3.11-alpine`
222 | - Build args: `app_version`, `data_version`, `storage_account`
223 | - Entry point: `python -m nhp.docker`
224 | - Tags: `dev` (PRs), `v*.*.*` (releases), `latest` (latest release)
225 |
226 | ## Common Issues and Workarounds
227 |
228 | **Known Issues:**
229 | 1. **Dockerfile Version**: Uses `ENV SETUPTOOLS_SCM_PRETEND_VERSION=v0.0.0` because setuptools-scm needs git metadata (TODO: build wheel and copy instead)
230 | 2. **Data Structure**: Model expects parquet files at `{data_path}/{file}/fyear={year}/dataset={dataset}/`. Missing files cause runtime errors.
231 | 3. **Notebooks**: `notebooks/` directory excluded from linting - don't lint these Databricks notebooks.
232 |
233 | **Environment Variables (Docker):**
234 | - `APP_VERSION`, `DATA_VERSION` (default: "dev")
235 | - `STORAGE_ACCOUNT` (required for Azure), `BATCH_SIZE` (default: 16)
236 | - `.env` file supported via python-dotenv for local development
237 |
238 | ## Testing Strategy
239 |
240 | - **Unit Tests**: `tests/unit/` - Mock-based, parameterized. **ALWAYS run before committing.**
241 | - **Integration Tests**: `tests/integration/` - Require properly formatted test data, test end-to-end runs
242 | - **Test Organization**: pytest-mock for mocking, fixtures in `tests/conftest.py`
243 | - **Coverage**: High coverage maintained via Codecov integration
244 |
245 | ## Best Practices for Coding Agents
246 |
247 | 1. **ALWAYS install dependencies first**: Run `uv sync --extra dev` before any development work.
248 |
249 | 2. **ALWAYS run linting before committing**: Run `uvx ruff check .` and `uvx ruff format --check .` - these MUST pass.
250 |
251 | 3. **ALWAYS run unit tests**: Run `uv run pytest tests/unit` before committing - all tests MUST pass.
252 |
253 | 4. **Follow Google docstring convention**: All public functions/classes must have Google-style docstrings (enforced by ruff).
254 |
255 | 5. **Respect line length**: Maximum 100 characters per line (ruff will enforce this).
256 |
257 | 6. **Don't modify notebooks**: The `notebooks/` directory is excluded from linting for a reason. These are Databricks notebooks with special formatting.
258 |
259 | 7. **Use uv for all Python commands**: Prefix commands with `uv run` to ensure correct virtual environment usage.
260 |
261 | 8. **Don't modify uv.lock manually**: Use `uv sync` to update dependencies.
262 |
263 | 9. **Test locally before pushing**: The CI checks are strict and will fail if linting/tests don't pass.
264 |
265 | 10. **Understand the data structure**: The model requires specific data formats. If testing model execution, ensure proper test data is available or use existing test fixtures.
266 |
267 | ## Quick Reference
268 |
269 | ```bash
270 | # Setup (production + dev dependencies)
271 | curl -LsSf https://astral.sh/uv/install.sh | sh
272 | uv sync --extra dev
273 |
274 | # Lint (MUST pass)
275 | uvx ruff check .
276 | uvx ruff format --check .
277 |
278 | # Test (MUST pass)
279 | uv run pytest tests/unit --verbose
280 |
281 | # Build
282 | uv build
283 |
284 | # Run model (requires data)
285 | uv run python -m nhp.model queue/params-sample.json -d data --type all
286 |
287 | # Build docs (requires docs extras)
288 | uv sync --extra docs
289 | uv run mkdocs build --clean
290 | ```
291 |
292 | **When in doubt, check the CI workflows in `.github/workflows/` - they define the exact validation steps used in the pipeline.**
293 |
--------------------------------------------------------------------------------