├── .Rbuildignore
├── .gitattributes
├── .github
    ├── .gitignore
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   ├── maintenance.md
    │   └── scientific-improvement.md
    ├── PULL_REQUEST_TEMPLATE
    │   ├── pull_request_template.md
    │   └── vulnerability.md
    ├── dependabot.yml
    ├── scripts
    │   ├── cleanup-on-pr-close.sh
    │   ├── create_pool.py
    │   ├── delete-container-tag.sh
    │   └── docker_build_and_push.sh
    └── workflows
    │   ├── block-fixup.yaml
    │   ├── check-news-md.yaml
    │   ├── cleanup-on-pr-close.yaml
    │   ├── containers-and-az-pool.yaml
    │   ├── delete-container-tag.yaml
    │   ├── format-suggest.yaml
    │   ├── gh-act
    │       ├── 2-dry.sh
    │       └── 2-full.sh
    │   ├── manual-docker-prune.yml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   ├── r-cmd-check.yaml
    │   ├── start-app-job.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .lintr
├── .pre-commit-config.yaml
├── .secrets.baseline
├── CONTRIBUTING.md
├── DESCRIPTION
├── DISCLAIMER.md
├── Dockerfile
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R
    ├── azure.R
    ├── config.R
    ├── data.R
    ├── diagnostics.R
    ├── exclusions.R
    ├── fit_model.R
    ├── parameters.R
    ├── pipeline.R
    ├── read_data.R
    ├── utils.R
    └── write_output.R
├── README.md
├── SOP.md
├── _pkgdown.yml
├── air.toml
├── azure
    ├── generate_configs.py
    ├── generate_rerun_configs.py
    ├── job.py
    └── requirements.txt
├── code-of-conduct.md
├── codecov.yml
├── container-app-jobs
    ├── README.md
    ├── blob-config-runner
    │   ├── config.ini
    │   ├── requirements.txt
    │   └── start-jobs.py
    └── job-template.yaml
├── data-raw
    ├── convert_gostic_toy_rt_to_test_dataset.R
    └── sir_gt_pmf.R
├── data
    ├── gostic_toy_rt.rda
    └── sir_gt_pmf.rda
├── image.png
├── man
    ├── Config.Rd
    ├── Data.Rd
    ├── Exclusions.Rd
    ├── Interval.Rd
    ├── Parameters.Rd
    ├── apply_exclusions.Rd
    ├── download_file_from_container.Rd
    ├── download_if_specified.Rd
    ├── extract_diagnostics.Rd
    ├── fetch_blob_container.Rd
    ├── fetch_credential_from_env_var.Rd
    ├── fit_model.Rd
    ├── format_stan_opts.Rd
    ├── gostic_toy_rt.Rd
    ├── low_case_count_diagnostic.Rd
    ├── opts_formatter.Rd
    ├── pipeline.Rd
    ├── read_data.Rd
    ├── read_disease_parameters.Rd
    ├── read_exclusions.Rd
    ├── read_interval_pmf.Rd
    ├── read_json_into_config.Rd
    ├── sample_processing_functions.Rd
    ├── sir_gt_pmf.Rd
    ├── write_model_outputs.Rd
    └── write_output_dir_structure.Rd
├── open_practices.md
├── rules_of_behavior.md
├── start.sh
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── fit_model.md
    │       ├── parameters.md
    │       └── read_data.md
    │   ├── data
    │       ├── 2025-04-02_test.parquet
    │       ├── CA_COVID-19.json
    │       ├── CA_test.parquet
    │       ├── bad_config.json
    │       ├── sample_config_no_exclusion.json
    │       ├── sample_config_with_exclusion.json
    │       ├── sample_fit.rds
    │       ├── test_big_exclusions.csv
    │       ├── test_data.parquet
    │       ├── test_exclusions.csv
    │       ├── test_parameters.parquet
    │       ├── us_overall_test_data.parquet
    │       └── v_bad_config.json
    │   ├── helper-expect_pipeline_files_written.R
    │   ├── helper-write_exclusion.R
    │   ├── helper-write_parameter_file.R
    │   ├── test-diagnostics.R
    │   ├── test-exclusions.R
    │   ├── test-fit_model.R
    │   ├── test-parameters.R
    │   ├── test-pipeline.R
    │   ├── test-read_data.R
    │   └── test-write_output.R
├── thanks.md
└── utils
    └── Rt_review_exclusions.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | $Dockerfile-batch^
 2 | ^.env$
 3 | ^CONTRIBUTING.md$
 4 | ^DISCLAIMER.md$
 5 | ^Dockerfile$
 6 | ^Dockerfile-batch$
 7 | ^Dockerfile-dependencies$
 8 | ^Dockerfile.unified$
 9 | ^LICENSE.md$
10 | ^Makefile$
11 | ^SOP.md$
12 | ^[\.]?air\.toml$
13 | ^\.github$
14 | ^\.lintr$
15 | ^\.pre-commit-config\.yaml$
16 | ^\.secrets.baseline$
17 | ^\.vscode$
18 | ^_pkgdown\.yml$
19 | ^azure$
20 | ^batch-autoscale-formula.txt$
21 | ^code-of-conduct.md$
22 | ^codecov\.yml$
23 | ^container-app-jobs$
24 | ^data-raw$
25 | ^docs$
26 | ^image.png$
27 | ^open_practices.md$
28 | ^pkgdown$
29 | ^rules_of_behavior.md$
30 | ^start.sh$
31 | ^thanks.md$
32 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | NEWS.md merge=union
 2 | 
 3 | # Normal text let sit to auto
 4 | *.htm text
 5 | *.html text
 6 | *.css text
 7 | *.js text
 8 | 
 9 | ## Declare files that will always have LF (aka \n aka 10 aka 0x0a) line endings on checkout.
10 | *.sh text eol=lf
11 | *.md text eol=lf
12 | *.json text eol=lf
13 | *.yml text eol=lf
14 | *.csv text eol=lf
15 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what feature is not working.
12 | 
13 | **Impact**
14 | Please describe the impact this bug is causing to your program or organization.
15 | 
16 | **To Reproduce**
17 | Steps to reproduce the behavior:
18 | 1. Go to '...'
19 | 2. Click on '....'
20 | 3. Scroll down to '....'
21 | 4. See error
22 | 
23 | **Expected behavior**
24 | A clear and concise description of what you expected to happen.
25 | 
26 | **Screenshots**
27 | If applicable, add screenshots to help explain your problem.
28 | 
29 | **Logs**
30 | If applicable, please attach logs to help describe your problem.
31 | 
32 | **Desktop (please complete the following information):**
33 |  - OS: [e.g. iOS]
34 |  - Browser [e.g. chrome, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Smartphone (please complete the following information):**
38 |  - Device: [e.g. iPhone6]
39 |  - OS: [e.g. iOS8.1]
40 |  - Browser [e.g. stock browser, safari]
41 |  - Version [e.g. 22]
42 | 
43 | **Additional context**
44 | Add any other context about the problem here.
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/maintenance.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Maintenance
 3 | about: Questions and requests related to organizational support and maintenance
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What type of help do you need?**
11 | 
12 | * [ ] Question
13 | * [ ] New Repo
14 | * [ ] Delete Repo
15 | * [ ] User Membership (please make sure new members are familiar with the [CDC open practices](https://github.com/CDCgov/template/blob/master/open_practices.md#profile-setup) and set up their profile with name and org info to help people collaborate with them)
16 | * [ ] Other
17 | 
18 | **Please describe how you'd like us to help.**
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/scientific-improvement.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Scientific improvement
 3 | about: Suggest a way to improve an existing tool or pipeline
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Describe the improvement that needs to be made
11 | (e.g. update a parameter estimate, tweak the prior, modify the model)
12 | 
13 | ## Provide links to references to methods or data sources
14 | 
15 | ## Describe the changes expected to the model's outputs
16 | 
17 | ## Suggest new tests that will need to be implemented
18 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | **Please describe the bug this fixes or the feature this adds.**
10 | 
11 | **Please describe how you tested this change. Include unit tests whenever possible.**
12 | 
13 | **Did you create or modify any associated documentation with this change? If documentation is not included in PR, please link to related documentation.**
14 | 
15 | **If you added or modified HTML, did you check that it was 508 compliant?**
16 | 
17 | **Please tag any specific reviewers you would like to review this PR**
18 | 
19 | **Please include the following checks for open source contributing?**
20 | 
21 | * [ ] Did you check for sensitive data, and remove any?
22 | * [ ] Are additional approvals needed for this change?
23 | * [ ] Are there potential vulnerabilities or licensing issues with any new dependencies introduced?
24 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/vulnerability.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Vulnerability Maintenance
 3 | about: Routine updates to address vulnerabilities.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What vulnerabilities does this PR remove or update?**
11 | 
12 | **Have you tested to make sure these updates do not cause unintended consequences?**
13 | 
14 | **Are these patch updates? minor? major?**
15 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 |  # package ecosystems to update and where the package manifests are located.
 3 |  # Please see the documentation for all configuration options:
 4 |  # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 |  version: 2
 7 |  updates:
 8 |    - package-ecosystem: "github-actions" # See documentation for possible values
 9 |      directory: "/" # Location of package manifests
10 |      schedule:
11 |        interval: "weekly"
12 |      ignore:
13 |       - dependency-name: "*"
14 |         update-types: ["version-update:semver-patch"]
15 | 


--------------------------------------------------------------------------------
/.github/scripts/cleanup-on-pr-close.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Delete Batch Pools and associated jobs
 4 | 
 5 | if [ "${#}" -ne 3 ]; then
 6 |   echo "Usage: $0 <account_name> <resource_group> <pool_id>"
 7 |   exit 1
 8 | fi
 9 | 
10 | ACCOUNT_NAME="$1"
11 | RESOURCE_GROUP="$2"
12 | POOL_ID="$3"
13 | 
14 | echo "Logging into Batch account"
15 | az batch account login \
16 |   --name "${ACCOUNT_NAME}" \
17 |   --resource-group "${RESOURCE_GROUP}"
18 | 
19 | ##########################
20 | # Fetch & delete jobs
21 | 
22 | echo "Fetching jobs in pool ${POOL_ID}"
23 | 
24 | JOB_IDS=$(az batch job list --query "[?poolInfo.poolId=='$POOL_ID'].id" --output tsv)
25 | 
26 | if [ -z "${JOB_IDS}" ]; then
27 |   echo "No jobs found in pool: ${POOL_ID}"
28 | else
29 |   # Iterate line-by-line over the tsv list
30 |   echo "${JOB_IDS}" | while IFS= read -r JOB_ID; do
31 |     echo "Deleting job ${JOB_ID}"
32 |     az batch job delete --job-id "${JOB_ID}" --yes
33 |   done
34 | fi
35 | 
36 | ##########################
37 | # Delete pool
38 | 
39 | az batch pool delete --pool-id "${POOL_ID}" --yes 2>/dev/null || {
40 |   echo "Pool ${POOL_ID} does not exist or has already been deleted"
41 | }
42 | 


--------------------------------------------------------------------------------
/.github/scripts/create_pool.py:
--------------------------------------------------------------------------------
  1 | # /// script
  2 | # requires-python = ">=3.13"
  3 | # dependencies = [
  4 | #     "azure-batch==14.2",
  5 | #     "azure-identity==1.21",
  6 | #     "azure-mgmt-batch==18.0",
  7 | #     "msrest==0.7",
  8 | # ]
  9 | # ///
 10 | """
 11 | If running locally, use:
 12 | uv run --env-file .env .github/scripts/create_pool.py
 13 | Requires a `.env` file with at least the following:
 14 | BATCH_ACCOUNT="<batch account name>"
 15 | SUBSCRIPTION_ID="<azure subscription id>"
 16 | BATCH_USER_ASSIGNED_IDENTITY="<user assigned identity>"
 17 | AZURE_BATCH_ACCOUNT_CLIENT_ID="<azure client id>"
 18 | PRINCIPAL_ID="<principal id>"
 19 | CONTAINER_REGISTRY_SERVER="<container registry server>"
 20 | CONTAINER_IMAGE_NAME="https://full-cr-server/<container image name>:tag"
 21 | POOL_ID="<pool id>"
 22 | SUBNET_ID="<subnet id>"
 23 | RESOURCE_GROUP="<resource group name>"
 24 | 
 25 | If running in CI, all of the above environment variables should be set in the repo
 26 | secrets.
 27 | """
 28 | 
 29 | import os
 30 | 
 31 | from azure.identity import DefaultAzureCredential
 32 | from azure.mgmt.batch import BatchManagementClient
 33 | 
 34 | AUTO_SCALE_FORMULA = """
 35 | // In this example, the pool size
 36 | // is adjusted based on the number of tasks in the queue.
 37 | // Note that both comments and line breaks are acceptable in formula strings.
 38 | 
 39 | // Get pending tasks for the past 5 minutes.
 40 | $samples = $ActiveTasks.GetSamplePercent(TimeInterval_Minute * 5);
 41 | // If we have fewer than 70 percent data points, we use the last sample point, otherwise we use the maximum of last sample point and the history average.
 42 | $tasks = $samples < 70 ? max(0, $ActiveTasks.GetSample(1)) :
 43 | max( $ActiveTasks.GetSample(1), avg($ActiveTasks.GetSample(TimeInterval_Minute * 5)));
 44 | // If number of pending tasks is not 0, set targetVM to pending tasks, otherwise half of current dedicated.
 45 | $targetVMs = $tasks > 0 ? $tasks : max(0, $TargetDedicatedNodes / 2);
 46 | // The pool size is capped at 100, if target VM value is more than that, set it to 100.
 47 | cappedPoolSize = 100;
 48 | $TargetDedicatedNodes = max(0, min($targetVMs, cappedPoolSize));
 49 | // Set node deallocation mode - keep nodes active only until tasks finish
 50 | $NodeDeallocationOption = taskcompletion;
 51 | """
 52 | 
 53 | 
 54 | def main() -> None:
 55 |     # Create the BatchManagementClient
 56 |     batch_mgmt_client = BatchManagementClient(
 57 |         credential=DefaultAzureCredential(),
 58 |         subscription_id=os.environ["SUBSCRIPTION_ID"],
 59 |     )
 60 | 
 61 |     # Assemble the pool parameters
 62 |     pool_parameters = {
 63 |         "identity": {
 64 |             "type": "UserAssigned",
 65 |             "userAssignedIdentities": {
 66 |                 os.environ["BATCH_USER_ASSIGNED_IDENTITY"]: {
 67 |                     "clientId": os.environ["AZURE_BATCH_ACCOUNT_CLIENT_ID"],
 68 |                     "principalId": os.environ["PRINCIPAL_ID"],
 69 |                 }
 70 |             },
 71 |         },
 72 |         "properties": {
 73 |             "vmSize": "STANDARD_d4d_v5",
 74 |             "interNodeCommunication": "Disabled",
 75 |             "taskSlotsPerNode": 1,
 76 |             "taskSchedulingPolicy": {"nodeFillType": "Spread"},
 77 |             "deploymentConfiguration": {
 78 |                 "virtualMachineConfiguration": {
 79 |                     "imageReference": {
 80 |                         "publisher": "microsoft-dsvm",
 81 |                         "offer": "ubuntu-hpc",
 82 |                         "sku": "2204",
 83 |                         "version": "latest",
 84 |                     },
 85 |                     "nodeAgentSkuId": "batch.node.ubuntu 22.04",
 86 |                     "containerConfiguration": {
 87 |                         "type": "dockercompatible",
 88 |                         "containerImageNames": [os.environ["CONTAINER_IMAGE_NAME"]],
 89 |                         "containerRegistries": [
 90 |                             {
 91 |                                 "identityReference": {
 92 |                                     "resourceId": os.environ[
 93 |                                         "BATCH_USER_ASSIGNED_IDENTITY"
 94 |                                     ]
 95 |                                 },
 96 |                                 "registryServer": os.environ[
 97 |                                     "CONTAINER_REGISTRY_SERVER"
 98 |                                 ],
 99 |                             }
100 |                         ],
101 |                     },
102 |                 }
103 |             },
104 |             "networkConfiguration": {
105 |                 "subnetId": os.environ["SUBNET_ID"],
106 |                 "publicIPAddressConfiguration": {"provision": "NoPublicIPAddresses"},
107 |                 "dynamicVnetAssignmentScope": "None",
108 |             },
109 |             "scaleSettings": {
110 |                 "autoScale": {
111 |                     "evaluationInterval": "PT5M",
112 |                     "formula": AUTO_SCALE_FORMULA,
113 |                 }
114 |             },
115 |             "resizeOperationStatus": {
116 |                 "targetDedicatedNodes": 1,
117 |                 "nodeDeallocationOption": "Requeue",
118 |                 "resizeTimeout": "PT15M",
119 |                 "startTime": "2023-07-05T13:18:25.7572321Z",
120 |             },
121 |             "currentDedicatedNodes": 0,
122 |             "currentLowPriorityNodes": 0,
123 |             "targetNodeCommunicationMode": "Simplified",
124 |             "currentNodeCommunicationMode": "Simplified",
125 |         },
126 |     }
127 | 
128 |     batch_mgmt_client.pool.create(
129 |         resource_group_name=os.environ["RESOURCE_GROUP"],
130 |         account_name=os.environ["BATCH_ACCOUNT"],
131 |         pool_name=os.environ["POOL_ID"],
132 |         parameters=pool_parameters,
133 |     )
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     main()
138 | 


--------------------------------------------------------------------------------
/.github/scripts/delete-container-tag.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Delete container tags from Azure CR
 4 | 
 5 | if [ "${#}" -ne 3 ]; then
 6 |   echo "Usage: $0 <registry> <image> <tag>"
 7 |   exit 1
 8 | fi
 9 | 
10 | REGISTRY="$1"
11 | IMAGE="$2"
12 | TAG="$3"
13 | 
14 | ##########################
15 | # Delete container tags
16 | 
17 | # Remove the image from the registry
18 | az acr repository delete \
19 |     --yes \
20 |     --name "${REGISTRY}" \
21 |     --image "${IMAGE}:${TAG}"
22 | 


--------------------------------------------------------------------------------
/.github/scripts/docker_build_and_push.sh:
--------------------------------------------------------------------------------
 1 | IMAGE="ghcr.io/cdcgov/$1"
 2 | TAG=$2
 3 | BUILDER=docker-container-driver-builder
 4 | 
 5 | # create a builder with the docker-container driver to allow cache-export
 6 | docker buildx create --name "$BUILDER" --driver=docker-container || true
 7 | 
 8 | # use the registry cache for prior images of the same tag, or the 'latest' tag
 9 | time docker buildx build --push -t "$IMAGE:$TAG" \
10 | 	--builder "$BUILDER" \
11 | 	--cache-from "type=registry,ref=$IMAGE:$TAG-cache" \
12 | 	--cache-from "type=registry,ref=$IMAGE:latest-cache" \
13 | 	--cache-to "type=registry,ref=$IMAGE:$TAG-cache,mode=max" \
14 | 	-f Dockerfile .
15 | 


--------------------------------------------------------------------------------
/.github/workflows/block-fixup.yaml:
--------------------------------------------------------------------------------
 1 | name: Block Fix-up (Git Check)
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   block-fixup:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - uses: actions/checkout@v4
11 |     - name: Block Fixup Commit Merge
12 |       uses: 13rac1/block-fixup-merge-action@v2.0.0
13 | 


--------------------------------------------------------------------------------
/.github/workflows/check-news-md.yaml:
--------------------------------------------------------------------------------
 1 | # All PRs into main MUST be deliberately labelled in the NEWS.md with a succint but informative entry
 2 | # describing the changes made - this workflow checks to make sure that this has been done.
 3 | 
 4 | name: Check NEWS.md Update
 5 | 
 6 | on:
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   check-news-md-modification:
13 | 
14 |     name: Check NEWS.md modification
15 | 
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |       - name: Checkout code
20 |         uses: actions/checkout@v4
21 |         with:
22 |           fetch-depth: 0 # Fetch all history for all tags and branches
23 | 
24 |       - name: Check for NEWS.md changes
25 |         run: |
26 |           echo "Current SHA: $GITHUB_SHA"
27 |           echo "Base SHA: ${{ github.event.pull_request.base.sha }}"
28 | 
29 |           git fetch origin ${{ github.event.pull_request.base.ref }}
30 | 
31 |           CHANGED_FILES=$(git diff --name-only $GITHUB_SHA $(git merge-base $GITHUB_SHA origin/${{ github.event.pull_request.base.ref }}))
32 |           echo "Changed files:"
33 |           echo "$CHANGED_FILES"
34 | 
35 |           if echo "$CHANGED_FILES" | grep -q "NEWS.md"; then
36 |             echo "NEWS.md has been modified."
37 |           else
38 |             echo "::error file=NEWS.md,line=1,col=5::NEWS.md must be updated with each PR." >&2
39 |             exit 1
40 |           fi
41 |         shell: /usr/bin/bash -e {0}
42 | 


--------------------------------------------------------------------------------
/.github/workflows/cleanup-on-pr-close.yaml:
--------------------------------------------------------------------------------
 1 | name: Tear down Batch pool
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |       - closed
 7 |   workflow_dispatch:
 8 |     inputs:
 9 |       tag:
10 |         description: The name of the tag to delete. Usually the branch name.
11 |         type: string
12 | 
13 | env:
14 |   IMAGE_TAG: ${{ inputs.tag || github.head_ref || github.ref_name }}
15 |   # getting tag from input or branch name https://stackoverflow.com/a/71158878
16 | 
17 | jobs:
18 | 
19 |   delete-pool:
20 |     environment: production
21 |     permissions:
22 |       id-token: write
23 |     runs-on: ubuntu-latest
24 |     name: Delete Batch pool
25 | 
26 |     steps:
27 |       - name: Protect 'latest'
28 |         run: |
29 |           if [ "${{ env.IMAGE_TAG }}" = "latest" ]; then
30 |             echo "Cannot delete pool for 'latest'"
31 |             exit 1
32 |           fi
33 | 
34 |       # From: https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers#requesting-the-jwt-using-the-actions-core-toolkit
35 |       - name: Install OIDC Client from Core Package
36 |         run: npm install @actions/core@1.6.0 @actions/http-client
37 |       - name: Get Id Token
38 |         uses: actions/github-script@v7
39 |         id: idtoken
40 |         with:
41 |           script: |
42 |             const coredemo = require('@actions/core')
43 |             const id_token = await coredemo.getIDToken('api://AzureADTokenExchange')
44 |             coredemo.setOutput('id_token', id_token)
45 | 
46 |       - name: Delete pool
47 |         uses: CDCgov/cfa-actions/runner-action@v1.4.0
48 |         with:
49 |           github_app_id: ${{ secrets.CDCENT_ACTOR_APP_ID }}
50 |           github_app_pem: ${{ secrets.CDCENT_ACTOR_APP_PEM }}
51 |           wait_for_completion: true
52 |           print_logs: true
53 |           script: |
54 |               CURRENT_BRANCH='${{ github.event.pull_request.head.sha || github.ref_name }}'
55 |               echo "Cloning repo at commit '$CURRENT_BRANCH'"
56 |               git clone https://github.com/${{ github.repository }}.git
57 |               cd ${{ github.event.repository.name }}
58 |               git checkout $CURRENT_BRANCH
59 | 
60 |               echo "Logging into Azure CLI"
61 |               az login --service-principal \
62 |                 --username ${{ secrets.AZURE_NNHT_SP_CLIENT_ID }} \
63 |                 --tenant ${{ secrets.TENANT_ID }} \
64 |                 --federated-token ${{ steps.idtoken.outputs.id_token }} \
65 |                 --output none
66 | 
67 |               echo "Running cleanup pool script"
68 |               bash .github/scripts/cleanup-on-pr-close.sh \
69 |                 "${{ secrets.BATCH_ACCOUNT }}" \
70 |                 "${{ secrets.PRD_RESOURCE_GROUP }}" \
71 |                 "cfa-epinow2-${{ env.IMAGE_TAG }}"
72 | 


--------------------------------------------------------------------------------
/.github/workflows/delete-container-tag.yaml:
--------------------------------------------------------------------------------
 1 | name: Delete tag from container registries
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [closed]
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       tag:
 9 |         description: The name of the tag to delete. Usually the branch name.
10 |         type: string
11 | 
12 | env:
13 |   IMAGE_NAME: cfa-epinow2-pipeline
14 |   IMAGE_TAG: ${{ inputs.tag || github.head_ref || github.ref_name }}
15 |   # getting tag from input or branch name https://stackoverflow.com/a/71158878
16 | 
17 | jobs:
18 |   delete-tag-ghcr:
19 |     continue-on-error: true # allow other tag deletion to happen even if one fails
20 |     permissions:
21 |       packages: write
22 |     runs-on: ubuntu-latest
23 |     name: Delete image tag from GHCR
24 | 
25 |     steps:
26 |       # Deleting a package from GHCR by tag name is surprising complex
27 |       # This action has been approved for use on cdcent/cdcgov by the CDC Github Team
28 |       # https://github.com/snok/container-retention-policy
29 |       - name: Delete image tag
30 |         uses: snok/container-retention-policy@v3.0.0
31 |         with:
32 |           account: ${{ github.repository_owner }}
33 |           token: ${{ secrets.GITHUB_TOKEN }}
34 |           image-names: ${{ env.IMAGE_NAME }}
35 |           image-tags: ${{ env.IMAGE_TAG }},${{ env.IMAGE_TAG }}-cache
36 |           cut-off: 1s # required, minimum package age to be a candidate for deletion
37 | 
38 |   delete-tag-acr:
39 |     environment: production
40 |     continue-on-error: true # allow other tag deletion to happen even if one fails
41 |     permissions:
42 |       id-token: write
43 |     runs-on: ubuntu-latest
44 |     name: Delete image tag from ACR
45 | 
46 |     steps:
47 |       - name: Protect 'latest'
48 |         run: |
49 |           if [ "${{ env.IMAGE_TAG }}" = "latest" ]; then
50 |             echo "Cannot delete pool for 'latest'"
51 |             exit 1
52 |           fi
53 | 
54 |       # From: https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers#requesting-the-jwt-using-the-actions-core-toolkit
55 |       - name: Install OIDC Client from Core Package
56 |         run: npm install @actions/core@1.6.0 @actions/http-client
57 |       - name: Get Id Token
58 |         uses: actions/github-script@v7
59 |         id: idtoken
60 |         with:
61 |           script: |
62 |             const coredemo = require('@actions/core')
63 |             const id_token = await coredemo.getIDToken('api://AzureADTokenExchange')
64 |             coredemo.setOutput('id_token', id_token)
65 | 
66 |       - name: Delete ACR tag
67 |         uses: CDCgov/cfa-actions/runner-action@v1.4.0
68 |         with:
69 |           github_app_id: ${{ secrets.CDCENT_ACTOR_APP_ID }}
70 |           github_app_pem: ${{ secrets.CDCENT_ACTOR_APP_PEM }}
71 |           wait_for_completion: true
72 |           print_logs: true
73 |           script: |
74 |               CURRENT_BRANCH='${{ github.event.pull_request.head.sha || github.ref_name }}'
75 |               echo "Cloning repo at commit '$CURRENT_BRANCH'"
76 |               git clone https://github.com/${{ github.repository }}.git
77 |               cd ${{ github.event.repository.name }}
78 |               git checkout $CURRENT_BRANCH
79 | 
80 |               echo "Logging into Azure CLI"
81 |               az login --service-principal \
82 |                 --username ${{ secrets.AZURE_NNHT_SP_CLIENT_ID }} \
83 |                 --tenant ${{ secrets.TENANT_ID }} \
84 |                 --federated-token ${{ steps.idtoken.outputs.id_token }} \
85 |                 --output none
86 | 
87 |               echo "Running delete tag script"
88 |               bash .github/scripts/delete-container-tag.sh \
89 |                 ${{ secrets.CONTAINER_REGISTRY_URL }} \
90 |                 ${{ env.IMAGE_NAME }} \
91 |                 ${{ env.IMAGE_TAG }}
92 | 


--------------------------------------------------------------------------------
/.github/workflows/format-suggest.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/posit-dev/setup-air/tree/main/examples
 2 | on:
 3 |   pull_request:
 4 | 
 5 | name: format-suggest.yaml
 6 | 
 7 | permissions: read-all
 8 | 
 9 | jobs:
10 |   format-suggest:
11 |     name: format-suggest
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       pull-requests: write
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: Install
19 |         uses: posit-dev/setup-air@v1
20 | 
21 |       - name: Format
22 |         run: air format .
23 | 
24 |       - name: Suggest
25 |         uses: reviewdog/action-suggester@v1
26 |         with:
27 |           level: error
28 |           fail_level: error
29 |           tool_name: air
30 | 


--------------------------------------------------------------------------------
/.github/workflows/gh-act/2-dry.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Requires that you have first run 'gh extension install nektos/gh-act'
 4 | # as well as having installed the docker engine and added your user to the docker group
 5 | 
 6 | # This checks syntax before you push to Github Actions, helping with debug hell
 7 | # To run the entire pipeline locally, see 2-full.sh
 8 | 
 9 | gh act -P cfa-cdcgov=... -n -W '.github/workflows/2-Run-Epinow2-Pipeline.yaml'
10 | 


--------------------------------------------------------------------------------
/.github/workflows/gh-act/2-full.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Requires that you have first run 'gh extension install nektos/gh-act'
4 | # as well as having installed the docker engine and added your user to the docker group
5 | 
6 | # This runs the github actions workflow locally
7 | 
8 | gh act -P cfa-cdcgov=catthehacker/ubuntu:full-20.04 -W '.github/workflows/2-Run-Epinow2-Pipeline.yaml'
9 | 


--------------------------------------------------------------------------------
/.github/workflows/manual-docker-prune.yml:
--------------------------------------------------------------------------------
 1 | name: Manual Docker System Prune (CDCgov)
 2 | on:
 3 |   workflow_dispatch:
 4 | 
 5 | jobs:
 6 |   docker-system-prune:
 7 |     runs-on: cfa-cdcgov
 8 | 
 9 |     steps:
10 |     - run: docker system prune --all --force --volumes
11 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown website
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     permissions:
23 |       contents: write
24 |       pull-requests: write
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - uses: r-lib/actions/setup-pandoc@v2
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 |         with:
32 |           use-public-rspm: true
33 |           r-version: "4.4.3"
34 | 
35 |       - uses: r-lib/actions/setup-r-dependencies@v2
36 |         with:
37 |           extra-packages: any::pkgdown, local::.
38 |           needs: website
39 | 
40 |       - name: Build site
41 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Save artifact
45 |         if: ${{ github.event_name == 'pull_request' }}
46 |         uses: actions/upload-artifact@v4
47 |         with:
48 |           name: pkgdown-site
49 |           path: ./docs
50 |           retention-days: 7
51 | 
52 |       - name: Post to PR
53 |         uses: CDCgov/cfa-actions/post-artifact@v1.4.0
54 |         if: ${{ github.event_name == 'pull_request' }}
55 |         with:
56 |           artifact-name: pkgdown-site
57 |           gh-token: ${{ secrets.GITHUB_TOKEN }}
58 | 
59 |       - name: Deploy to GitHub pages 🚀
60 |         if: github.event_name != 'pull_request'
61 |         uses: JamesIves/github-pages-deploy-action@v4.7.3
62 |         with:
63 |           clean: false
64 |           branch: gh-pages
65 |           folder: docs
66 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   workflow_dispatch:
 7 | 
 8 | name: PR Commands
 9 | 
10 | jobs:
11 |   document:
12 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'USER') && startsWith(github.event.comment.body, '/document') }}
13 |     name: document
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |         with:
20 |           persist-credentials: false
21 | 
22 |       - uses: r-lib/actions/pr-fetch@v2
23 |         with:
24 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
25 | 
26 |       - uses: r-lib/actions/setup-r@v2
27 |         with:
28 |           use-public-rspm: true
29 | 
30 |       - uses: r-lib/actions/setup-r-dependencies@v2
31 |         with:
32 |           extra-packages: any::roxygen2
33 |           needs: pr-document
34 | 
35 |       - name: Document
36 |         run: roxygen2::roxygenise()
37 |         shell: Rscript {0}
38 | 
39 |       - name: commit
40 |         run: |
41 |           git config --local user.name "$GITHUB_ACTOR"
42 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
43 |           git add man/\* NAMESPACE
44 |           git commit -m 'Document'
45 | 
46 |       - uses: r-lib/actions/pr-push@v2
47 |         with:
48 |           repo-token: ${{ secrets.ZS_PAT }}
49 | 
50 |   style:
51 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'USER') && startsWith(github.event.comment.body, '/style') }}
52 |     name: style
53 |     runs-on: ubuntu-latest
54 |     env:
55 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
56 |     steps:
57 |       - uses: actions/checkout@v4
58 |         with:
59 |           persist-credentials: false
60 | 
61 |       - uses: r-lib/actions/pr-fetch@v2
62 |         with:
63 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
64 | 
65 |       - uses: r-lib/actions/setup-r@v2
66 | 
67 |       - name: Install dependencies
68 |         run: install.packages("styler")
69 |         shell: Rscript {0}
70 | 
71 |       - name: Style
72 |         run: styler::style_pkg()
73 |         shell: Rscript {0}
74 | 
75 |       - name: commit
76 |         run: |
77 |           git config --local user.name "$GITHUB_ACTOR"
78 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
79 |           git add \*.R
80 |           git commit -m 'Style'
81 | 
82 |       - uses: r-lib/actions/pr-push@v2
83 |         with:
84 |           repo-token: ${{ secrets.ZS_PAT }}
85 | 


--------------------------------------------------------------------------------
/.github/workflows/r-cmd-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | 
 4 | name: R CMD check
 5 | 
 6 | on:
 7 |   pull_request:
 8 |     branches: [main]
 9 |   push:
10 |     branches:
11 |       - main
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   R-CMD-check:
19 |     runs-on: ${{ matrix.config.os }}
20 | 
21 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
22 | 
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         config:
27 |           - {os: ubuntu-latest,   r: 'release'}
28 |           # Dropping these for now because we deploy only on Ubuntu.
29 |           # Ucomment as needed if supporting additional environments.
30 |           #- {os: macos-latest,   r: 'release'}
31 |           #- {os: windows-latest, r: 'release'}
32 | 
33 |     env:
34 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
35 |       R_KEEP_PKG_SOURCE: yes
36 | 
37 |     steps:
38 |       - uses: actions/checkout@v4
39 | 
40 |       - uses: r-lib/actions/setup-pandoc@v2
41 | 
42 |       - uses: r-lib/actions/setup-r@v2
43 |         with:
44 |           r-version: "4.4.3"
45 |           http-user-agent: ${{ matrix.config.http-user-agent }}
46 |           use-public-rspm: true
47 | 
48 |       - uses: r-lib/actions/setup-r-dependencies@v2
49 |         with:
50 |           extra-packages: any::rcmdcheck, any::roxygen2
51 |           needs: check
52 | 
53 |       - name: Install cmdstan
54 |         uses: epinowcast/actions/install-cmdstan@v1
55 |         with:
56 |           cmdstan-version: '2.36.0'
57 |           num-cores: 2
58 | 
59 |       - name: Check that roxygen documentation is up to date
60 |         run: |
61 |           Rscript -e "roxygen2::roxygenize()"
62 |           git diff --exit-code man || (echo "::error::Documentation is not up to date. Run 'roxygen2::roxygenize()' locally to re-render." && exit 1)
63 | 
64 |       - uses: r-lib/actions/check-r-package@v2
65 |         with:
66 |           upload-snapshots: true
67 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
68 | 


--------------------------------------------------------------------------------
/.github/workflows/start-app-job.yaml:
--------------------------------------------------------------------------------
 1 | name: Start Container App Job
 2 | 
 3 | # This GitHub Actions workflow executes a Container App Job for the cfa-epinow2-pipeline.
 4 | # It requires a config file be provided as input.
 5 | # Steps are to pull a template of the configured job from Azure, update the template with
 6 | # the config file provided by the user, and start the job.
 7 | # Operations are done through Azure CLI.
 8 | 
 9 | on:
10 |   workflow_dispatch:
11 |     inputs:
12 |       config_file:
13 |         description: "Config File"
14 |         required: true
15 |         default:
16 | 
17 | env:
18 |   RESOURCE_GROUP: ext-edav-cfa-prd
19 |   JOB_NAME: cfa-epinow2-test-caj
20 | 
21 | jobs:
22 |   start-caj:
23 |     environment: production
24 |     permissions:
25 |       id-token: 'write'
26 |       packages: 'read'
27 |       contents: 'write'
28 |     runs-on: ubuntu-latest
29 |     name: start caj
30 |     steps:
31 |       - name: Azure login with OIDC
32 |         id: azure_login_2
33 |         uses: azure/login@v2
34 |         with:
35 |         # managed by EDAV. Contact Amit Mantri or Jon Kislin if you have issues.
36 |           client-id: ${{ secrets.AZURE_NNHT_SP_CLIENT_ID }}
37 |           tenant-id: ${{ secrets.TENANT_ID }}
38 |           subscription-id: ${{ secrets.SUBSCRIPTION_ID }}
39 | 
40 |       - name: Get container app job template
41 |         run: |
42 |           az containerapp job show \
43 |             --resource-group  "${{ env.RESOURCE_GROUP }}" \
44 |             --name "${{ env.JOB_NAME }}" \
45 |             --query "properties.template" \
46 |             --output yaml > job-template.yaml
47 | 
48 |       - name: Update template with input value
49 |         run: |
50 |           sed -i 's|<<config_file>>|${{ github.event.inputs.config_file }}|' job-template.yaml
51 | 
52 |       - name: Run container app job
53 |         run: |
54 |           az containerapp job start \
55 |             --resource-group  "${{ env.RESOURCE_GROUP }}" \
56 |             --name "${{ env.JOB_NAME }}" \
57 |             --yaml job-template.yaml
58 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | name: Code coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 |           r-version: "4.4.3"
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: any::covr
28 |           needs: coverage
29 | 
30 |       - name: Install cmdstan
31 |         uses: epinowcast/actions/install-cmdstan@v1
32 |         with:
33 |           cmdstan-version: '2.36.0'
34 |           num-cores: 2
35 | 
36 |       - name: Test coverage
37 |         shell: Rscript {0}
38 |         run: |
39 |           covr::codecov(
40 |             quiet = FALSE,
41 |             clean = FALSE,
42 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package"),
43 |             token = "${{ secrets.CODECOV_TOKEN }}"
44 |           )
45 | 
46 |       - name: Show testthat output
47 |         if: always()
48 |         run: |
49 |           ## --------------------------------------------------------------------
50 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
51 |         shell: bash
52 | 
53 |       - name: Upload test results
54 |         if: failure()
55 |         uses: actions/upload-artifact@v4
56 |         with:
57 |           name: coverage-test-failures
58 |           path: ${{ runner.temp }}/package
59 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(object_name_linter = NULL, object_usage_linter = NULL)
2 | encoding: "UTF-8"
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # All available hooks: https://pre-commit.com/hooks.html
 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit
 3 | repos:
 4 | # R
 5 | -   repo: https://github.com/lorenzwalthert/precommit
 6 |     rev: v0.4.3.9003
 7 |     hooks:
 8 |     -   id: use-tidy-description
 9 |     -   id: lintr
10 |     -   id: readme-rmd-rendered
11 |     -   id: parsable-R
12 |     -   id: no-browser-statement
13 |     -   id: no-print-statement
14 |         exclude: '^tests/testthat/test-print\.R$'
15 |     -   id: no-debug-statement
16 |     -   id: deps-in-desc
17 | -   repo: https://github.com/pre-commit/pre-commit-hooks
18 |     rev: v5.0.0
19 |     hooks:
20 |     -   id: check-added-large-files
21 |         args: ['--maxkb=200']
22 |     -   id: file-contents-sorter
23 |         files: '^\.Rbuildignore$'
24 |     -   id: end-of-file-fixer
25 |         exclude: '(\.Rd)|(tests/testthat/_snaps/)'
26 |     -   id: check-yaml
27 |     -   id: check-toml
28 |     -   id: mixed-line-ending
29 |         args: ['--fix=lf']
30 |     -   id: trailing-whitespace
31 |         exclude: '(tests/testthat/_snaps/)|(\.Rd)'
32 | -   repo: https://github.com/pre-commit-ci/pre-commit-ci-config
33 |     rev: v1.6.1
34 |     hooks:
35 |     # Only required when https://pre-commit.ci is used for config validation
36 |     -   id: check-pre-commit-ci-config
37 | -   repo: local
38 |     hooks:
39 |     -   id: forbid-to-commit
40 |         name: Don't commit common R artifacts
41 |         entry: Cannot commit .Rhistory, .RData, .Rds or .rds.
42 |         language: fail
43 |         files: '\.(Rhistory|RData|Rds|rds)$'
44 |         exclude: '^tests/testthat/data/.*\.rds$'
45 |         # `exclude: <regex>` to allow committing specific files
46 | # Secrets
47 | -   repo: https://github.com/Yelp/detect-secrets
48 |     rev: v1.5.0
49 |     hooks:
50 |     -   id: detect-secrets
51 |         args: ['--baseline', '.secrets.baseline']
52 |         exclude: package.lock.json
53 | ci:
54 |     autofix_commit_msg: |
55 |         [pre-commit.ci] auto fixes from pre-commit.com hooks
56 | 
57 |         for more information, see https://pre-commit.ci
58 |     autofix_prs: true
59 |     autoupdate_branch: ''
60 |     autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
61 |     autoupdate_schedule: weekly
62 |     submodules: false
63 | 


--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
  1 | {
  2 |   "version": "1.5.0",
  3 |   "plugins_used": [
  4 |     {
  5 |       "name": "ArtifactoryDetector"
  6 |     },
  7 |     {
  8 |       "name": "AWSKeyDetector"
  9 |     },
 10 |     {
 11 |       "name": "AzureStorageKeyDetector"
 12 |     },
 13 |     {
 14 |       "name": "Base64HighEntropyString",
 15 |       "limit": 4.5
 16 |     },
 17 |     {
 18 |       "name": "BasicAuthDetector"
 19 |     },
 20 |     {
 21 |       "name": "CloudantDetector"
 22 |     },
 23 |     {
 24 |       "name": "DiscordBotTokenDetector"
 25 |     },
 26 |     {
 27 |       "name": "GitHubTokenDetector"
 28 |     },
 29 |     {
 30 |       "name": "GitLabTokenDetector"
 31 |     },
 32 |     {
 33 |       "name": "HexHighEntropyString",
 34 |       "limit": 3.0
 35 |     },
 36 |     {
 37 |       "name": "IbmCloudIamDetector"
 38 |     },
 39 |     {
 40 |       "name": "IbmCosHmacDetector"
 41 |     },
 42 |     {
 43 |       "name": "IPPublicDetector"
 44 |     },
 45 |     {
 46 |       "name": "JwtTokenDetector"
 47 |     },
 48 |     {
 49 |       "name": "KeywordDetector",
 50 |       "keyword_exclude": ""
 51 |     },
 52 |     {
 53 |       "name": "MailchimpDetector"
 54 |     },
 55 |     {
 56 |       "name": "NpmDetector"
 57 |     },
 58 |     {
 59 |       "name": "OpenAIDetector"
 60 |     },
 61 |     {
 62 |       "name": "PrivateKeyDetector"
 63 |     },
 64 |     {
 65 |       "name": "PypiTokenDetector"
 66 |     },
 67 |     {
 68 |       "name": "SendGridDetector"
 69 |     },
 70 |     {
 71 |       "name": "SlackDetector"
 72 |     },
 73 |     {
 74 |       "name": "SoftlayerDetector"
 75 |     },
 76 |     {
 77 |       "name": "SquareOAuthDetector"
 78 |     },
 79 |     {
 80 |       "name": "StripeDetector"
 81 |     },
 82 |     {
 83 |       "name": "TelegramBotTokenDetector"
 84 |     },
 85 |     {
 86 |       "name": "TwilioKeyDetector"
 87 |     }
 88 |   ],
 89 |   "filters_used": [
 90 |     {
 91 |       "path": "detect_secrets.filters.allowlist.is_line_allowlisted"
 92 |     },
 93 |     {
 94 |       "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
 95 |       "min_level": 2
 96 |     },
 97 |     {
 98 |       "path": "detect_secrets.filters.heuristic.is_indirect_reference"
 99 |     },
100 |     {
101 |       "path": "detect_secrets.filters.heuristic.is_likely_id_string"
102 |     },
103 |     {
104 |       "path": "detect_secrets.filters.heuristic.is_lock_file"
105 |     },
106 |     {
107 |       "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
108 |     },
109 |     {
110 |       "path": "detect_secrets.filters.heuristic.is_potential_uuid"
111 |     },
112 |     {
113 |       "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
114 |     },
115 |     {
116 |       "path": "detect_secrets.filters.heuristic.is_sequential_string"
117 |     },
118 |     {
119 |       "path": "detect_secrets.filters.heuristic.is_swagger_file"
120 |     },
121 |     {
122 |       "path": "detect_secrets.filters.heuristic.is_templated_secret"
123 |     }
124 |   ],
125 |   "results": {},
126 |   "generated_at": "2024-09-20T17:50:20Z"
127 | }
128 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Welcome!
 2 | Thank you for contributing to CDC's Open Source projects! If you have any
 3 | questions or doubts, don't be afraid to send them our way. We appreciate all
 4 | contributions, and we are looking forward to fostering an open, transparent, and
 5 | collaborative environment.
 6 | 
 7 | ## Package authorship
 8 | These guidelines build on the recommendations in [R packages](https://r-pkgs.org/).
 9 | 
10 | * Maintainer: The CFA team member who is actively in charge of maintaining the
11 | package.
12 | * Authors: Any CFA team member who has taken a lead role in code development,
13 | review, testing, etc.
14 | * Contributors: Team members who have made smaller but substantial contributions
15 | to the codebase (40+ hours, including coding, code review, testing, etc.)
16 | * Smaller contributions: Smaller contributiosn of code to this repo or
17 | to its predecessor, cdcent/cfa-nnh-pipelines, including small bug fixes,
18 | issuees, or code review will not be granted package authorship in the
19 | description file, but may be acknowledged in NEWS.md or release notes if
20 | appropriate
21 | 
22 | Before contributing, we encourage you to also read our [LICENSE](LICENSE),
23 | [README](README.md), and
24 | [code-of-conduct](code-of-conduct.md)
25 | files, also found in this repository. If you have any inquiries or questions not
26 | answered by the content of this repository, feel free to [contact us](mailto:surveillanceplatform@cdc.gov).
27 | 
28 | ## Public Domain
29 | This project is in the public domain within the United States, and copyright and
30 | related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
31 | All contributions to this project will be released under the CC0 dedication. By
32 | submitting a pull request you are agreeing to comply with this waiver of
33 | copyright interest.
34 | 
35 | ## Requesting Changes
36 | Our pull request/merging process is designed to give the CDC Surveillance Team
37 | and other in our space an opportunity to consider and discuss any suggested
38 | changes. This policy affects all CDC spaces, both on-line and off, and all users
39 | are expected to abide by it.
40 | 
41 | ### Open an issue in the repository
42 | If you don't have specific language to submit but would like to suggest a change
43 | or have something addressed, you can open an issue in this repository. Team
44 | members will respond to the issue as soon as possible.
45 | 
46 | ### Submit a pull request
47 | If you would like to contribute, please submit a pull request. In order for us
48 | to merge a pull request, it must:
49 |    * Be at least seven days old. Pull requests may be held longer if necessary
50 |      to give people the opportunity to assess it.
51 |    * Receive a +1 from a majority of team members associated with the request.
52 |      If there is significant dissent between the team, a meeting will be held to
53 |      discuss a plan of action for the pull request.
54 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: CFAEpiNow2Pipeline
 2 | Title: EpiNow2 wrapper for deployment to Azure Batch
 3 | Version: 0.2.1.9000
 4 | Authors@R: c(
 5 |     person("Zachary", "Susswein", , "utb2@cdc.gov", role = "aut",
 6 |            comment = c(ORCID = "0000-0002-4329-4833")),
 7 |     person("Katelyn", "Gostic", , "uep6@cdc.gov", role = "aut",
 8 |            comment = c(ORCID = "0000-0002-9369-6371")),
 9 |     person("Nathan", "McIntosh", , "ute2@cdc.gov", role = "aut"),
10 |     person("Patrick", "Corbett", , "pyv3@cdc.gov", role = "aut"),
11 |     person("Adam", "Howes", , "xwg3@cdc.gov", role = "aut",
12 |            comment = c(ORCID = "0000-0003-2386-4031")),
13 |     person("Micah", "Wiesner", , "zqm6@cdc.gov", role = c("aut", "cre"))
14 |     )
15 | Description: Add logging, metadata handling, and data handling
16 |     functions to use EpiNow2 in a pipeline. This pipeline is optimized
17 |     for the Center for Forecasting and Outbreak Analytics' use-case,
18 |     fitting hundreds of models in parallel.
19 | License: Apache License (>= 2)
20 | Encoding: UTF-8
21 | Remotes:
22 |     github::epiforecasts/EpiNow2@bcf297cf36a93cc56123bc3c9e8cebfb1421a962,
23 |     github::stan-dev/cmdstanr
24 | Roxygen: list(markdown = TRUE)
25 | RoxygenNote: 7.3.2
26 | Suggests:
27 |     primarycensored,
28 |     testthat (>= 3.0.0),
29 |     usethis,
30 |     withr
31 | Config/testthat/edition: 3
32 | Imports:
33 |     AzureRMR,
34 |     AzureStor,
35 |     cmdstanr,
36 |     cli,
37 |     data.table,
38 |     DBI,
39 |     dplyr,
40 |     duckdb,
41 |     EpiNow2 (>= 1.4.0),
42 |     jsonlite,
43 |     rlang,
44 |     rstan,
45 |     S7,
46 |     lubridate,
47 |     readxl,
48 |     tidyr,
49 |     tidybayes,
50 |     optparse,
51 |     Microsoft365R
52 | Additional_repositories:
53 | 	https://stan-dev.r-universe.dev
54 | URL: https://cdcgov.github.io/cfa-epinow2-pipeline/
55 | Depends:
56 |     R (>= 3.50)
57 | LazyData: true
58 | 


--------------------------------------------------------------------------------
/DISCLAIMER.md:
--------------------------------------------------------------------------------
 1 | # DISCLAIMER
 2 | Use of this service is limited only to **non-sensitive and publicly available
 3 | data**. Users must not use, share, or store any kind of sensitive data like
 4 | health status, provision or payment of healthcare, Personally Identifiable
 5 | Information (PII) and/or Protected Health Information (PHI), etc. under **ANY**
 6 | circumstance.
 7 | 
 8 | Administrators for this service reserve the right to moderate all information
 9 | used, shared, or stored with this service at any time. Any user that cannot
10 | abide by this disclaimer and Code of Conduct  may be subject to action, up to
11 | and including revoking access to services.
12 | 
13 | The material embodied in this software is provided to you "as-is" and without
14 | warranty of any kind, express, implied or otherwise, including without
15 | limitation, any warranty of fitness for a particular purpose. In no event shall
16 | the Centers for Disease Control and Prevention (CDC) or the United States (U.S.)
17 | government be liable to you or anyone else for any direct, special, incidental,
18 | indirect or consequential damages of any kind, or any damages whatsoever,
19 | including without limitation, loss of profit, loss of use, savings or revenue,
20 | or the claims of third parties, whether or not CDC or the U.S. government has
21 | been advised of the possibility of such loss, however caused and on any theory
22 | of liability, arising out of or in connection with the possession, use or
23 | performance of this software.
24 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker.io/rocker/geospatial:4.4.1
 2 | 
 3 | # Will copy the package to the container preserving the directory structure
 4 | RUN mkdir -p pkg
 5 | 
 6 | COPY ./DESCRIPTION pkg/
 7 | 
 8 | # Installing missing dependencies
 9 | RUN apt-get update && apt-get install -y --no-install-recommends pandoc-citeproc
10 | RUN install2.r pak
11 | # dependencies = TRUE means we install `suggests` too
12 | RUN Rscript -e 'pak::local_install_deps("pkg", upgrade = FALSE, dependencies = TRUE)'
13 | # The cmdstan version will need to be incrementally updated
14 | # Must also manually bump cmdstan version `.github/workflows` when updating
15 | RUN Rscript -e 'cmdstanr::install_cmdstan(version="2.36.0")'
16 | # This requires access to the Azure Container Registry
17 | # FROM ghcr.io/cdcgov/cfa-epinow2-pipeline:${TAG}
18 | 
19 | # Will copy the package to the container preserving the directory structure
20 | COPY . pkg/
21 | 
22 | # Install the full package while leaving the tar.gz file in the
23 | # container for later use.
24 | RUN R CMD build --no-build-vignettes --no-manual pkg && \
25 |     R CMD INSTALL CFAEpiNow2Pipeline_*.tar.gz
26 | 
27 | # Ensure the package is working properly
28 | RUN R CMD check --no-build-vignettes --no-manual CFAEpiNow2Pipeline_*.tar.gz
29 | 
30 | CMD ["bash"]
31 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | REGISTRY=cfaprdbatchcr.azurecr.io/
 2 | IMAGE_NAME=cfa-epinow2-pipeline
 3 | BRANCH=$(shell git branch --show-current)
 4 | CONFIG_CONTAINER=rt-epinow2-config
 5 | CNTR_MGR=docker
 6 | ifeq ($(BRANCH), main)
 7 | TAG=latest
 8 | else
 9 | TAG=$(BRANCH)
10 | endif
11 | 
12 | CONFIG=test.json
13 | POOL="cfa-epinow2-$(TAG)"
14 | TIMESTAMP:=$(shell  date -u +"%Y%m%d_%H%M%S")
15 | JOB:=Rt-estimation-$(TIMESTAMP)
16 | 
17 | # The report date to use, in ISO format (YYYY-MM-DD). Default is today
18 | REPORT_DATE?=$(shell date -u +%F)
19 | 
20 | .DEFAULT_GOAL := help
21 | 
22 | help:
23 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
24 | 
25 | pull: ## Login to Azure Container Registry and pull the latest container image
26 | 	az acr login --name 'cfaprdbatchcr'
27 | 	$(CNTR_MGR) pull $(REGISTRY)$(IMAGE_NAME):$(TAG)
28 | 
29 | build: ## Build the Docker image with given tag
30 | 	$(CNTR_MGR) build -t $(REGISTRY)$(IMAGE_NAME):$(TAG) \
31 | 		--build-arg TAG=$(TAG) -f Dockerfile .
32 | 
33 | tag: ## Tags the local image for pushing to the container registry
34 | 	$(CNTR_MGR) tag $(IMAGE_NAME):$(TAG) $(REGISTRY)$(IMAGE_NAME):$(TAG)
35 | 
36 | config: ## Generates a configuration file for running the model
37 | 	uv run azure/generate_configs.py \
38 | 		--disease="COVID-19,Influenza" \
39 | 		--state=all \
40 | 		--output-container=nssp-rt-v2 \
41 | 		--job-id=$(JOB) \
42 | 		--report-date-str=$(REPORT_DATE)
43 | 
44 | rerun-config: ## Generate a configuration file to rerun a previous model
45 | 	uv run azure/generate_rerun_configs.py \
46 | 		--output-container=nssp-rt-v2 \
47 | 		--job-id=$(JOB) \
48 | 		--report-date-str=$(REPORT_DATE)
49 | 
50 | run-batch: ## Runs job.py on Azure Batch
51 | 	uv run --env-file .env \
52 | 	azure/job.py \
53 | 		--image_name="$(REGISTRY)$(IMAGE_NAME):$(TAG)" \
54 | 		--config_container="$(CONFIG_CONTAINER)" \
55 | 		--pool_id="$(POOL)" \
56 | 		--job_id="$(JOB)"
57 | 
58 | run-prod: config run-batch ## Calls config and run-batch
59 | 
60 | rerun-prod: rerun-config run-batch ## Calls rerun-config and run-batch
61 | 
62 | run: ## Run pipeline from R interactively in the container
63 | 	$(CNTR_MGR) run --mount type=bind,source=$(PWD),target=/mnt -it \
64 | 	--env-file .env \
65 | 	--rm $(REGISTRY)$(IMAGE_NAME):$(TAG) \
66 | 	Rscript -e "CFAEpiNow2Pipeline::orchestrate_pipeline('$(CONFIG)', config_container = 'rt-epinow2-config', input_dir = '/mnt/input', output_dir = '/mnt')"
67 | 
68 | up: ## Start an interactive bash shell in the container with project directory mounted
69 | 	$(CNTR_MGR) run --mount type=bind,source=$(PWD),target=/cfa-epinow2-pipeline -it \
70 | 	--env-file .env \
71 | 	--rm $(REGISTRY)$(IMAGE_NAME):$(TAG) /bin/bash
72 | 
73 | push: ## Push the tagged image to the container registry
74 | 	$(CNTR_MGR) push $(REGISTRY)$(IMAGE_NAME):$(TAG)
75 | 
76 | test-batch: ## Run GitHub Actions workflow and then job.py for testing on Azure Batch
77 | 	uv run azure/generate_configs.py \
78 | 		--disease="COVID-19,Influenza" \
79 | 		--state=NY \
80 | 		--output-container=nssp-rt-testing \
81 | 		--job-id=$(JOB) \
82 | 		--report-date-str=$(REPORT_DATE)
83 | 	uv run --env-file .env \
84 | 		azure/job.py \
85 | 			--image_name="$(REGISTRY)$(IMAGE_NAME):$(TAG)" \
86 | 			--config_container="$(CONFIG_CONTAINER)" \
87 | 			--pool_id="$(POOL)" \
88 | 			--job_id="$(JOB)"
89 | 
90 | test: ## Run unit tests for the CFAEpiNow2Pipeline R package
91 | 	Rscript -e "testthat::test_local()"
92 | 
93 | document: ## Generate roxygen2 documentation for the CFAEpiNow2Pipeline R package
94 | 	Rscript -e "roxygen2::roxygenize()"
95 | 
96 | check: ## Perform R CMD check for the CFAEpiNow2Pipeline R package
97 | 	Rscript -e "rcmdcheck::rcmdcheck()"
98 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(Config)
 4 | export(Data)
 5 | export(DelayInterval)
 6 | export(Exclusions)
 7 | export(GenerationInterval)
 8 | export(Parameters)
 9 | export(RightTruncation)
10 | export(apply_exclusions)
11 | export(download_file_from_container)
12 | export(download_if_specified)
13 | export(execute_model_logic)
14 | export(extract_diagnostics)
15 | export(fetch_blob_container)
16 | export(fetch_credential_from_env_var)
17 | export(fit_model)
18 | export(format_delay_interval)
19 | export(format_generation_interval)
20 | export(format_right_truncation)
21 | export(format_stan_opts)
22 | export(low_case_count_diagnostic)
23 | export(orchestrate_pipeline)
24 | export(process_quantiles)
25 | export(process_samples)
26 | export(read_data)
27 | export(read_disease_parameters)
28 | export(read_exclusions)
29 | export(read_interval_pmf)
30 | export(read_json_into_config)
31 | export(write_model_outputs)
32 | export(write_output_dir_structure)
33 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # CFAEpiNow2Pipeline v0.2.0
 2 | 
 3 | ## Features
 4 | * Adding dependencies to install cmdstanr backend and using GH action
 5 | * convert drop cols value to character for point/state exlcusions
 6 | * Run `make test-batch` target locally
 7 | * Update runner action version
 8 | * Remove duplicate batch autoscale text file
 9 | * Improve consistency in docs
10 | * Update version of deploy action
11 | * Update github checkout action from V2 to V4
12 | * Setting up dependabot yaml file
13 | * Remove out-of-date demo folder
14 | * Add automated check that docs are up to date
15 | * Rewrite README for simplification and clarity
16 | * Switch to the `air` code formatter
17 | * Replace remaining self-hosted runner workflows with ubuntu-latest
18 | * Fix mismatch between R code and documentation
19 | * Change code owner and include authors in R package
20 | * Change code owner
21 | * Add documentation to the Makefile
22 | * Fix mismatch between R code and documentation
23 | * Fix production diseases
24 | * Add RSV specifications
25 | * Create the config files locally to speed things up
26 | * Lock dependencies for creating the pool
27 | * Saving state exclusions to nssp-rt/state_exclusions
28 | * Automate tag deletion from ghcr.io
29 | * Editing of `SOP.md`
30 | * Pin r-version at 4.4.3 for CI/CD
31 | * Fix minor typos in `SOP.md`.
32 | * Swap from `Dockerfile-batch` to using an inline-metadata script, managed by `uv`.
33 | * Adding dynamic logic to re-query for configs in blob
34 | * Automate creation of outlier csv for nssp-elt-2/outliers
35 | * Fix 'latest' tag for CI
36 | * Updated path for read/write of data outliers
37 | * Updating makefile to represent unified Dockerfile approach (not two-step build)
38 | * Make sure we change "COVID-19/Omicron" to "COVID-19" when reading NSSP data.
39 | * Unified Dockerfile
40 | * Add instructions for data outliers reruns to the SOP.
41 | * Add ability to call `make rerun-prod` to rerun just the tasks that needed a data change.
42 | * Add output container as a new field in the config file.
43 | * Building with ubuntu-latest and using Container App runner for all else, remove azure-cli action
44 | * Adding exclusions documentation and Makefile support
45 | * Add the blob storage container, if provided
46 | * Adding make command to test Azure batch
47 | * Updating subnet ID and pool VM to 22.04 from 20.04
48 | * Write model diagnostics to an output file, correcting an oversight
49 | * Refactored GH Actions container build to cfa-actions 2-step build
50 | * Creating SOP.md to document weekly run procedures, including diagram
51 | * Allows unique job_ids for runs.
52 | * Makefile supports either docker or podman as arguments to setup & manage containers
53 | * Streamlined configurable container execution provided by included start.sh script
54 | * Container App Job execution tools added including job-template.yaml file for single task and Python script for bulk tasks
55 | * GitHub Actions workflow added to start Azure Container App Job
56 | * Minor changes in removing unused container tags from Azure CR
57 | * Reactivated DEBUG level logs from EpiNow2 so that sampler progress is visible
58 | * Added new test data and unit tests for point exclusions
59 | 
60 | # CFAEpiNow2Pipeline v0.1.0
61 | 
62 | This initial release establishes minimal feature parity with the internal EpiNow2 Rt modeling pipeline. It adds wrappers to integrate with internal data schemas and ingest pre-estimated model parameters (i.e., generation intervals, right-truncation). It defines an output schema and adds comprehensive logging. The repository also has functionality to set up and deploy to Azure Batch.
63 | 
64 | ## Features
65 | 
66 | * GitHub Actions to build Docker images on PR and merge to main, deploy Azure Batch environments off the built images, and tear down the environment (including images) on PR close.
67 | * Comprehensive documentation of pipeline code and validation of input data, parameters, and model run configs
68 | * Set up comprehensive logging of model runs and handle pipeline failures to preserve logs where possible
69 | * Automatically download and upload inputs and outputs from Azure Blob Storage
70 | * A new script for building the pool. Runnable from CLI or GHA. Requires `uv` be installed, and then `uv` handles the python and dependency management based on the inline script metadata.
71 | 


--------------------------------------------------------------------------------
/R/azure.R:
--------------------------------------------------------------------------------
  1 | #' Download if specified
  2 | #'
  3 | #' @param blob_path The name of the blob to download
  4 | #' @param blob_storage_container The name of the container to download from
  5 | #' @param dir The directory to which to write the downloaded file
  6 | #' @return The path of the file
  7 | #' @family azure
  8 | #' @export
  9 | download_if_specified <- function(
 10 |   blob_path,
 11 |   blob_storage_container,
 12 |   dir
 13 | ) {
 14 |   # Guard against null input erroring out file.exists()
 15 |   if (rlang::is_null(blob_path)) {
 16 |     local_path <- NULL
 17 |   } else {
 18 |     file_exists <- file.exists(file.path(dir, blob_path))
 19 |     if (!rlang::is_null(blob_storage_container) && !file_exists) {
 20 |       container <- fetch_blob_container(blob_storage_container)
 21 |       local_path <- download_file_from_container(
 22 |         blob_storage_path = blob_path,
 23 |         local_file_path = file.path(dir, blob_path),
 24 |         storage_container = container
 25 |       )
 26 |     } else {
 27 |       local_path <- file.path(dir, blob_path)
 28 |     }
 29 |   }
 30 |   local_path
 31 | }
 32 | 
 33 | #' Download specified blobs from Blob Storage and save them in a local dir
 34 | #'
 35 | #' @param blob_storage_path A character of a blob in `storage_container`
 36 | #' @param local_file_path The local path to save the blob
 37 | #' @param storage_container The blob storage container with `blob_storage_path`
 38 | #
 39 | #' @return Invisibly, `local_file_path`
 40 | #' @family azure
 41 | #' @export
 42 | download_file_from_container <- function(
 43 |   blob_storage_path,
 44 |   local_file_path,
 45 |   storage_container
 46 | ) {
 47 |   cli::cli_alert_info(
 48 |     "Downloading blob {.path {blob_storage_path}} to {.path {local_file_path}}"
 49 |   )
 50 | 
 51 |   rlang::try_fetch(
 52 |     {
 53 |       dirs <- dirname(local_file_path)
 54 | 
 55 |       if (!dir.exists(dirs)) {
 56 |         cli::cli_alert("Creating directory {.path {dirs}}")
 57 |         dir.create(dirs, recursive = TRUE)
 58 |       }
 59 | 
 60 |       AzureStor::download_blob(
 61 |         container = storage_container,
 62 |         src = blob_storage_path,
 63 |         dest = local_file_path,
 64 |         overwrite = TRUE
 65 |       )
 66 |     },
 67 |     error = function(cnd) {
 68 |       cli::cli_abort(c(
 69 |         "Failed to download {.path {blob_storage_path}}",
 70 |         ">" = "Does the blob exist in the container?"
 71 |       ))
 72 |     }
 73 |   )
 74 | 
 75 |   cli::cli_alert_success(
 76 |     "Blob {.path {blob_storage_path}} downloaded successfully"
 77 |   )
 78 | 
 79 |   invisible(local_file_path)
 80 | }
 81 | 
 82 | #' Load Azure Blob container using credentials in environment variables
 83 | #'
 84 | #' This function depends on the following Azure credentials stored in
 85 | #' environment variables:
 86 | #'
 87 | #' * `az_tenant_id`: an Azure Active Directory (AAD) tenant ID
 88 | #' * `az_subscription_id`: an Azure subscription ID
 89 | #' * `az_resource_group`: The name of the Azure resource group
 90 | #' * `az_storage_account`: The name of the Azure storage account
 91 | #'
 92 | #' As a result it is an impure function, and should be used bearing that
 93 | #' warning in mind. Each variable is obtained using
 94 | #' [fetch_credential_from_env_var()] (which will return an error if the
 95 | #' credential is not specified or empty).
 96 | #'
 97 | #' @param container_name The Azure Blob Storage container associated with the
 98 | #'   credentials
 99 | #' @return A Blob endpoint
100 | #' @family azure
101 | #' @export
102 | fetch_blob_container <- function(container_name) {
103 |   cli::cli_alert_info(
104 |     "Attempting to connect to container {.var {container_name}}"
105 |   )
106 |   cli::cli_alert_info("Loading Azure credentials from env vars")
107 |   # nolint start: object_name_linter
108 |   az_tenant_id <- fetch_credential_from_env_var("az_tenant_id")
109 |   az_client_id <- fetch_credential_from_env_var("az_client_id")
110 |   az_service_principal <- fetch_credential_from_env_var("az_service_principal")
111 |   # nolint end: object_name_linter
112 |   cli::cli_alert_success("Credentials loaded successfully")
113 | 
114 |   cli::cli_alert_info("Authenticating with loaded credentials")
115 |   rlang::try_fetch(
116 |     {
117 |       # First, get a general-purpose token using SP flow
118 |       # Analogous to:
119 |       # az login --service-principal \
120 |       #    --username $az_client_id \
121 |       #    --password $az_service_principal \
122 |       #    --tenant $az_tenant_id
123 |       # NOTE: the SP is also sometimes called the `client_secret`
124 |       token <- AzureRMR::get_azure_token(
125 |         resource = "https://storage.azure.com",
126 |         tenant = az_tenant_id,
127 |         app = az_client_id,
128 |         password = az_service_principal
129 |       )
130 |       # Then fetch a storage endpoint using the token. Follows flow from
131 |       # https://github.com/Azure/AzureStor.
132 |       # Note that we're using the ABS endpoint (the first example line)
133 |       # but following the AAD token flow from the AAD alternative at
134 |       # end of the box. If we didn't replace the endpoint and used the
135 |       # example flow then it allows authentication to blob but throws
136 |       # a 409 when trying to download.
137 |       endpoint <- AzureStor::storage_endpoint(
138 |         "https://cfaazurebatchprd.blob.core.windows.net",
139 |         token = token
140 |       )
141 | 
142 |       # Finally, set up instantiation of storage container generic
143 |       container <- AzureStor::storage_container(endpoint, container_name)
144 |     },
145 |     error = function(cnd) {
146 |       cli::cli_abort(
147 |         "Failure authenticating connection to {.var {container_name}}",
148 |         parent = cnd
149 |       )
150 |     }
151 |   )
152 | 
153 |   cli::cli_alert_success("Authenticated connection to {.var {container_name}}")
154 | 
155 |   return(container)
156 | }
157 | 
158 | #' Fetch Azure credential from environment variable
159 | #'
160 | #' And throw an informative error if credential is not found
161 | #'
162 | #' @param env_var A character, the credential to fetch
163 | #'
164 | #' @return The associated value
165 | #' @family azure
166 | #' @export
167 | fetch_credential_from_env_var <- function(env_var) {
168 |   credential <- Sys.getenv(env_var)
169 | 
170 |   if (credential == "") {
171 |     cli::cli_abort(
172 |       c(
173 |         "Error loading Azure credentials from environment variables",
174 |         "!" = "Environment variable {.envvar {env_var}} not specified or empty"
175 |       ),
176 |       class = "CFA_Rt"
177 |     )
178 |   }
179 | 
180 |   return(credential)
181 | }
182 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Synthetic dataset of stochastic SIR system with known Rt
 2 | #'
 3 | #' A dataset from Gostic, Katelyn M., et al. "Practical considerations for
 4 | #' measuring the effective reproductive number, Rt." PLoS Computational Biology
 5 | #' 16.12 (2020): e1008409. The data are simulated from a stochastic SEIR
 6 | #' compartmental model.
 7 | #'
 8 | #' This synthetic dataset has a number of desirable properties:
 9 | #'
10 | #' 1. The force of infection changes depending on the Rt, allowing for sudden
11 | #' changes in the Rt. This allows for modeling of sudden changes in infection
12 | #' dynamics, which might otherwise be difficult to capture. Rt estimation
13 | #' framework
14 | #'
15 | #' 2. The realized Rt is known at each timepoint
16 | #'
17 | #' 3. The dataset incorporates a simple generation interval and a reporting
18 | #' delay.
19 | #'
20 | #' Gostic et al. benchmark the performance of a number of Rt estimation
21 | #' frameworks, providing practical guidance on how to use this dataset to
22 | #' evaluate Rt estimates.
23 | #'
24 | #' In practice, we've found that the amount of observation noise in the
25 | #' incidence and/or observed cases is often undesirably low for testing. Many
26 | #' empirical datasets are much noisier. As a result, models built with these
27 | #' settings in mind can perform poorly on this dataset or fail to converge. To
28 | #' the original dataset, we add a new column with the original incidence counts
29 | #' with additional observation noise: `obs_incidence`. We manually add
30 | #' observation noise with `rnbinom(299, mu = gostic_toy_rt[["obs_cases"]], size
31 | #' = 10)` and the random seed 123456 and store it in the `obs_incidence` column.
32 | #'
33 | #' @name gostic_toy_rt
34 | #' @format `gostic_toy_rt` A data frame with 301 rows and 12 columns:
35 | #' \describe{
36 | #'    \item{time}{Timestep of the discrete-time stochastic SEIR simulation}
37 | #'    \item{date}{Added from the original Gostic, 2020 dataset. A date
38 | #'    corresponding to the assigned `time`. Arbitrarily starts on January 1st,
39 | #'    2023.}
40 | #'    \item{S, E, I, R}{The realized state of the stochastic SEIR system}
41 | #'    \item{dS, dEI, DIR}{The stochastic transition between compartments}
42 | #'    \item{incidence}{The true incidence in the `I` compartment at time t}
43 | #'    \item{obs_cases}{The observed number of cases at time t from
44 | #'    forward-convolved incidence.}
45 | #'    \item{obs_incidence}{Added from the original Gostic, 2020 dataset. The
46 | #'     `incidence` column with added negative-binomial observation noise.
47 | #'     Created with `set.seed(123456)` and the call
48 | #'      `rnbinom(299, mu = gostic_toy_rt[["incidence"]], size = 10)` Useful for
49 | #'       testing.}
50 | #'    \item{true_r0}{The initial R0 of the system (i.e., 2)}
51 | #'    \item{true_rt}{The known, true Rt of the epidemic system}
52 | #' }
53 | #' @source
54 | #' <https://github.com/cobeylab/Rt_estimation/tree/d9d8977ba8492ac1a3b8287d2f470b313bfb9f1d> # nolint
55 | #' @family data
56 | "gostic_toy_rt"
57 | 
58 | #' Generation interval corresponding to the sample `gostic_toy_rt` dataset
59 | #'
60 | #' Gostic et al., 2020 simulates data from a stochastic SEIR model. Residence
61 | #' time in both the E and the I compartments is exponentially distributed, with
62 | #' a mean of 4 days (or a rate/inverse-scale of 1/4). These residence times
63 | #' imply a gamma-distributed generation time distribution with a shape of 2 and
64 | #' a rate of 1/4. We convert the continuous gamma distribution into a PMF to use
65 | #' with `{RtGam}`.
66 | #'
67 | #' From this parametric specification, we produce a double-censored,
68 | #' left-truncated probability mass function of the generation interval
69 | #' distribution. We produce the PMF using `{epinowcast}`'s
70 | #' `simulate_double_censored_pmf()` with version 0.3.0. See
71 | #' https://doi.org/10.1101/2024.01.12.24301247 for more information on
72 | #' double-censoring biases and corrections.
73 | #'
74 | #' We correct the output from `simulate_double_censored_pmf()` to make it
75 | #' appropriate to use with `{EpiNow2}`. The function returns a numeric vector,
76 | #' with the position of the element corresponding to one day more than the
77 | #' length of the delay and value corresponding to the amount of discretized
78 | #' probability density in the bin. The vector does not necessarily sum to one.
79 | #' We drop the first element of the vector, which corresponds to a zero-day
80 | #' delay. The renewal framework, which underpins our model does not account for
81 | #' zero-day delays. We renormalize the left-truncated vector to sum to one so
82 | #' that it's a proper PMF.
83 | #'
84 | #' @name sir_gt_pmf
85 | #' @format `sir_gt_pmf` A numeric vector of length 26 that sums to one within
86 | #'   numerical tolerance
87 | #' @family data
88 | "sir_gt_pmf"
89 | 


--------------------------------------------------------------------------------
/R/exclusions.R:
--------------------------------------------------------------------------------
  1 | #' Convert case counts in matching rows to NA
  2 | #'
  3 | #' Mark selected points to be ignored in model fitting. This manual selection
  4 | #' occurs externally to the pipeline and is passed to the pipeline in an
  5 | #' exclusions file read with [read_exclusions()]. Mechanically, the exclusions
  6 | #' are applied by converting specified points to NAs in the dataset. NAs are
  7 | #' skipped in model fitting by EpiNow2, so matched rows are excluded from the
  8 | #' model likelihood.
  9 | #'
 10 | #' @param cases A dataframe returned by [read_data()]
 11 | #' @param exclusions A dataframe returned by [read_exclusions()]
 12 | #'
 13 | #' @return A dataframe with the same rows and schema as `cases` where the value
 14 | #'   in the column `confirm` converted to NA in any rows that match a row in
 15 | #'   `exclusions`
 16 | #' @family exclusions
 17 | #' @export
 18 | apply_exclusions <- function(cases, exclusions) {
 19 |   cli::cli_alert_info("Applying exclusions to case data")
 20 | 
 21 |   con <- DBI::dbConnect(duckdb::duckdb())
 22 |   on.exit(DBI::dbDisconnect(con))
 23 | 
 24 |   duckdb::duckdb_register(con, "cases", cases)
 25 |   duckdb::duckdb_register(con, "exclusions", exclusions)
 26 | 
 27 |   df <- DBI::dbGetQuery(
 28 |     con,
 29 |     "
 30 |     SELECT
 31 |       cases.report_date,
 32 |       cases.reference_date,
 33 |       cases.disease,
 34 |       cases.geo_value,
 35 |       CASE
 36 |         WHEN exclusions.reference_date IS NOT NULL THEN NULL
 37 |         ELSE cases.confirm
 38 |       END AS confirm
 39 |       FROM cases
 40 |       LEFT JOIN exclusions
 41 |         ON cases.reference_date = exclusions.reference_date
 42 |         AND cases.report_date = exclusions.report_date
 43 |         AND cases.geo_value = exclusions.geo_value
 44 |         AND cases.disease = exclusions.disease
 45 |       ORDER BY cases.reference_date
 46 |     "
 47 |   )
 48 | 
 49 |   cli::cli_alert_info("{.val {sum(is.na(df[['confirm']]))}} exclusions applied")
 50 | 
 51 |   return(df)
 52 | }
 53 | 
 54 | #' Read exclusions from an external file
 55 | #'
 56 | #' Expects to read a CSV with required columns:
 57 | #' * `reference_date`
 58 | #' * `report_date`
 59 | #' * `state`
 60 | #' * `disease`
 61 | #'
 62 | #' These columns have the same meaning as in [read_data()]. Additional columns
 63 | #' are allowed and will be ignored by the reader.
 64 | #'
 65 | #' @param path The path to the exclusions file in `.csv` format
 66 | #'
 67 | #' @return A dataframe with columns `reference_date`, `report_date`,
 68 | #'   `geo_value`, `disease`
 69 | #' @family exclusions
 70 | #' @export
 71 | read_exclusions <- function(path) {
 72 |   check_file_exists(path)
 73 | 
 74 |   con <- DBI::dbConnect(duckdb::duckdb())
 75 |   on.exit(DBI::dbDisconnect(con))
 76 |   df <- rlang::try_fetch(
 77 |     DBI::dbGetQuery(
 78 |       con,
 79 |       "
 80 |       SELECT
 81 |         reference_date,
 82 |         report_date,
 83 |         state AS geo_value,
 84 |         disease
 85 |       FROM read_csv(?)
 86 |         ",
 87 |       params = list(path)
 88 |     ),
 89 |     error = function(con) {
 90 |       cli::cli_abort(
 91 |         c(
 92 |           "Error fetching exclusions from {.path {path}}",
 93 |           "Original error: {con}"
 94 |         ),
 95 |         class = "wrapped_invalid_query"
 96 |       )
 97 |     }
 98 |   )
 99 | 
100 |   if (nrow(df) == 0) {
101 |     cli::cli_abort(
102 |       "No data matching returned from {.path {path}}",
103 |       class = "empty_return"
104 |     )
105 |   }
106 | 
107 |   cli::cli_alert_success("Exclusions file read")
108 | 
109 |   return(df)
110 | }
111 | 


--------------------------------------------------------------------------------
/R/fit_model.R:
--------------------------------------------------------------------------------
  1 | #' Fit an `EpiNow2` model
  2 | #'
  3 | #' @param data, in the format returned by [read_data()]
  4 | #' @param parameters As returned from [read_disease_parameters()]
  5 | #' @param seed The random seed, used for both initialization by `EpiNow2` in R
  6 | #'   and sampling in Stan
  7 | #' @param horizon The number of days, as an integer, to forecast
  8 | #' @param priors A list of lists. The first level should contain the key `rt`
  9 | #'   with elements `mean` and `sd` and the key `gp` with element `alpha_sd`.
 10 | #' @param sampler_opts A list. The Stan sampler options to be passed through
 11 | #'   EpiNow2. It has required keys: `cores`, `chains`, `iter_warmup`,
 12 | #'   `iter_sampling`, `max_treedepth`, and `adapt_delta`.
 13 | #'
 14 | #' @return A fitted model object of class `epinow` or, if model fitting fails,
 15 | #'   an NA is returned with a warning
 16 | #' @family pipeline
 17 | #' @export
 18 | fit_model <- function(
 19 |   data,
 20 |   parameters,
 21 |   seed,
 22 |   horizon,
 23 |   priors,
 24 |   sampler_opts
 25 | ) {
 26 |   # Priors ------------------------------------------------------------------
 27 |   rt <- EpiNow2::rt_opts(
 28 |     list(
 29 |       mean = priors[["rt"]][["mean"]],
 30 |       sd = priors[["rt"]][["sd"]]
 31 |     )
 32 |   )
 33 |   gp <- EpiNow2::gp_opts(
 34 |     alpha_sd = priors[["gp"]][["alpha_sd"]]
 35 |   )
 36 | 
 37 |   # Distributions -----------------------------------------------------------
 38 |   generation_time <- format_generation_interval(
 39 |     parameters[["generation_interval"]]
 40 |   )
 41 |   delays <- format_delay_interval(
 42 |     parameters[["delay_interval"]]
 43 |   )
 44 |   truncation <- format_right_truncation(
 45 |     parameters[["right_truncation"]],
 46 |     data
 47 |   )
 48 |   stan <- format_stan_opts(
 49 |     sampler_opts,
 50 |     seed
 51 |   )
 52 |   df <- data.frame(
 53 |     confirm = data[["confirm"]],
 54 |     date = as.Date(data[["reference_date"]])
 55 |   )
 56 |   rlang::try_fetch(
 57 |     withr::with_seed(seed, {
 58 |       EpiNow2::epinow(
 59 |         df,
 60 |         generation_time = generation_time,
 61 |         delays = delays,
 62 |         truncation = truncation,
 63 |         horizon = horizon,
 64 |         rt = rt,
 65 |         gp = gp,
 66 |         stan = stan,
 67 |         verbose = TRUE,
 68 |         # Dump logs to console to be caught by pipeline's logging instead of
 69 |         # EpiNow2's default through futile.logger
 70 |         logs = EpiNow2::setup_logging(
 71 |           threshold = "INFO",
 72 |           file = NULL,
 73 |           mirror_to_console = TRUE,
 74 |           name = "EpiNow2"
 75 |         ),
 76 |         filter_leading_zeros = FALSE,
 77 |       )
 78 |     }),
 79 |     error = function(cnd) {
 80 |       cli::cli_abort(
 81 |         "Call to EpiNow2::epinow() failed with an error",
 82 |         parent = cnd,
 83 |         class = "failing_fit"
 84 |       )
 85 |     }
 86 |   )
 87 | }
 88 | 
 89 | #' Format Stan options for input to EpiNow2
 90 | #'
 91 | #' Format configuration `sampler_opts` for input to `EpiNow2` via a call to
 92 | #' [EpiNow2::stan_opts()].
 93 | #'
 94 | #' @inheritParams fit_model
 95 | #' @param seed A stochastic seed passed here to the Stan sampler and as the R
 96 | #' PRNG seed for `EpiNow2` initialization
 97 | #'
 98 | #' @return A `stan_opts` object of arguments
 99 | #'
100 | #' @family pipeline
101 | #' @export
102 | format_stan_opts <- function(sampler_opts, seed) {
103 |   expected_stan_args <- c(
104 |     "cores",
105 |     "chains",
106 |     "iter_warmup",
107 |     "iter_sampling",
108 |     "adapt_delta",
109 |     "max_treedepth"
110 |   )
111 |   missing_keys <- !(expected_stan_args %in% names(sampler_opts))
112 |   missing_elements <- rlang::is_null(sampler_opts[expected_stan_args])
113 |   if (any(missing_keys) || any(missing_elements)) {
114 |     cli::cli_abort(c(
115 |       "Missing expected keys/values in {.val sampler_opts}",
116 |       "Missing keys: {.val {expected_stan_args[missing_keys]}}",
117 |       "Missing values: {.val {expected_stan_args[missing_elements]}}"
118 |     ))
119 |   }
120 |   EpiNow2::stan_opts(
121 |     cores = sampler_opts[["cores"]],
122 |     chains = sampler_opts[["chains"]],
123 |     seed = seed,
124 |     warmup = sampler_opts[["iter_warmup"]],
125 |     samples = sampler_opts[["iter_sampling"]],
126 |     control = list(
127 |       adapt_delta = sampler_opts[["adapt_delta"]],
128 |       max_treedepth = sampler_opts[["max_treedepth"]]
129 |     )
130 |   )
131 | }
132 | 


--------------------------------------------------------------------------------
/R/read_data.R:
--------------------------------------------------------------------------------
  1 | #' Read in the dataset of incident case counts
  2 | #'
  3 | #' Each row of the table corresponds to a single facilities' cases for a
  4 | #' reference-date/report-date/disease tuple. We want to aggregate these counts
  5 | #' to the level of geographic aggregate/report-date/reference-date/disease.
  6 | #'
  7 | #' We handle two distinct cases for geographic aggregates:
  8 | #'
  9 | #' 1. A single state: Subset to facilities **in that state only** and aggregate
 10 | #' up to the state level 2. The US overall: Aggregate over all facilities
 11 | #' without any subsetting
 12 | #'
 13 | #' Note that we do _not_ apply exclusions here. The exclusions are applied
 14 | #' later, after the aggregations. That means that for the US overall, we
 15 | #' aggregate over points that might potentially be excluded at the state level.
 16 | #' Our recourse in this case is to exclude the US overall aggregate point.
 17 | #'
 18 | #' @param data_path The path to the local file. This could contain a glob and
 19 | #'   must be in parquet format.
 20 | #' @inheritParams Config
 21 | #'
 22 | #' @return A dataframe with one or more rows and columns `report_date`,
 23 | #'   `reference_date`, `geo_value`, `confirm`
 24 | #' @family read_data
 25 | #' @export
 26 | read_data <- function(
 27 |   data_path,
 28 |   disease = c("COVID-19", "Influenza", "RSV", "test"),
 29 |   geo_value,
 30 |   report_date,
 31 |   max_reference_date,
 32 |   min_reference_date
 33 | ) {
 34 |   rlang::arg_match(disease)
 35 |   # NOTE: this is temporary workaround until we switch to the new API. I'm not
 36 |   # sure if there's a better way to do this without a whole bunch of special
 37 |   # casing -- which is its own code smell. I think this should really be handled
 38 |   # upstream in the ETL job and standardize on "COVID-19", but that's beyond
 39 |   # scope here and we need to do _something_ in the meantime so this runs.
 40 |   disease_map <- c(
 41 |     "COVID-19" = "COVID-19/Omicron",
 42 |     "Influenza" = "Influenza",
 43 |     "RSV" = "RSV",
 44 |     "test" = "test"
 45 |   )
 46 |   mapped_disease <- disease_map[[disease]]
 47 | 
 48 |   check_file_exists(data_path)
 49 | 
 50 |   parameters <- list(
 51 |     data_path = data_path,
 52 |     disease = mapped_disease,
 53 |     min_ref_date = stringify_date(min_reference_date),
 54 |     max_ref_date = stringify_date(max_reference_date),
 55 |     report_date = stringify_date(report_date)
 56 |   )
 57 | 
 58 |   # We need different queries for the states and the US overall. For US overall
 59 |   # we need to aggregate over all the facilities in all the states. For the
 60 |   # states, we need to aggregate over all the facilities in that one state
 61 |   if (geo_value == "US") {
 62 |     query <- "
 63 |    SELECT
 64 |      report_date,
 65 |      reference_date,
 66 |      CASE
 67 |        WHEN disease = 'COVID-19/Omicron' THEN 'COVID-19'
 68 |        ELSE disease
 69 |      END AS disease,
 70 |      -- We want to inject the 'US' as our abbrevation here bc data is not agg'd
 71 |      'US' AS geo_value,
 72 |       sum(value) AS confirm
 73 |     FROM read_parquet(?)
 74 |     WHERE 1=1
 75 |       AND disease = ?
 76 |       AND metric = 'count_ed_visits'
 77 |       AND reference_date >= ? :: DATE
 78 |       AND reference_date <= ? :: DATE
 79 |       AND report_date = ? :: DATE
 80 |     GROUP BY reference_date, report_date, disease
 81 |     ORDER BY reference_date
 82 |    "
 83 |   } else {
 84 |     # We want just one state so aggregate over facilites in that one state only
 85 |     query <- "
 86 |   SELECT
 87 |     report_date,
 88 |     reference_date,
 89 |     CASE
 90 |      WHEN disease = 'COVID-19/Omicron' THEN 'COVID-19'
 91 |      ELSE disease
 92 |     END AS disease,
 93 |     geo_value AS geo_value,
 94 |     sum(value) AS confirm,
 95 |   FROM read_parquet(?)
 96 |   WHERE 1=1
 97 |     AND disease = ?
 98 |     AND metric = 'count_ed_visits'
 99 |     AND reference_date >= ? :: DATE
100 |     AND reference_date <= ? :: DATE
101 |     AND report_date = ? :: DATE
102 |     AND geo_value = ?
103 |   GROUP BY geo_value, reference_date, report_date, disease
104 |   ORDER BY reference_date
105 |   "
106 |     # Append `geo_value` to the query
107 |     parameters <- c(parameters, list(geo_value = geo_value))
108 |   }
109 | 
110 |   con <- DBI::dbConnect(duckdb::duckdb())
111 |   on.exit(expr = DBI::dbDisconnect(con))
112 |   df <- rlang::try_fetch(
113 |     DBI::dbGetQuery(
114 |       con,
115 |       statement = query,
116 |       params = unname(parameters)
117 |     ),
118 |     error = function(con) {
119 |       cli::cli_abort(
120 |         c(
121 |           "Error fetching data from {.path {data_path}}",
122 |           "Using parameters:",
123 |           "*" = "data_path: {.path {parameters[['data_path']]}}",
124 |           "*" = "mapped_disease: {.val {parameters[['disease']]}}",
125 |           "*" = "min_reference_date: {.val {parameters[['min_ref_date']]}}",
126 |           "*" = "max_reference_date: {.val {parameters[['max_ref_date']]}}",
127 |           "*" = "report_date: {.val {parameters[['report_date']]}}",
128 |           "Original error: {con}"
129 |         ),
130 |         class = "wrapped_invalid_query"
131 |       )
132 |     }
133 |   )
134 | 
135 |   # Guard against empty return
136 |   if (nrow(df) == 0) {
137 |     cli::cli_abort(
138 |       c(
139 |         "No data matching returned from {.path {data_path}}",
140 |         "Using parameters {parameters}"
141 |       ),
142 |       class = "empty_return"
143 |     )
144 |   }
145 |   # Warn for incomplete return
146 |   n_rows_expected <- as.Date(max_reference_date) -
147 |     as.Date(min_reference_date) +
148 |     1
149 |   if (nrow(df) != n_rows_expected) {
150 |     expected_dates <- seq.Date(
151 |       from = as.Date(min_reference_date),
152 |       to = as.Date(max_reference_date),
153 |       by = "day"
154 |     )
155 |     missing_dates <- stringify_date(
156 |       # Setdiff strips the date attribute from the objects; re-add it so that we
157 |       # can pretty-format the date for printing
158 |       as.Date(
159 |         setdiff(expected_dates, df[["reference_date"]])
160 |       )
161 |     )
162 |     cli::cli_warn(
163 |       c(
164 |         "Incomplete number of rows returned",
165 |         "Expected {.val {n_rows_expected}} rows",
166 |         "Observed {.val {nrow(df)}} rows",
167 |         "Missing reference date(s): {missing_dates}"
168 |       ),
169 |       class = "incomplete_return"
170 |     )
171 |   }
172 | 
173 |   cli::cli_alert_success("Read {nrow(df)} rows from {.path {data_path}}")
174 |   return(df)
175 | }
176 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' DuckDB date comparison fails if the dates are not in string format
 2 | #' @noRd
 3 | stringify_date <- function(date) {
 4 |   if (inherits(date, "Date")) {
 5 |     format(date, "%Y-%m-%d")
 6 |   } else {
 7 |     date
 8 |   }
 9 | }
10 | 
11 | check_file_exists <- function(data_path) {
12 |   # Guard against file does not exist
13 |   cli::cli_alert("Reading data from {.path {data_path}}")
14 |   if (!file.exists(data_path)) {
15 |     cli::cli_abort(
16 |       "Cannot read data. File {.path {data_path}} doesn't exist",
17 |       class = "file_not_found"
18 |     )
19 |   }
20 |   invisible(data_path)
21 | }
22 | 
23 | #' If `x` is null or empty, return an empty string, otherwise `x`
24 | #' @noRd
25 | empty_str_if_non_existent <- function(x) {
26 |   ifelse(rlang::is_empty(x), "", x)
27 | }
28 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://cdcgov.github.io/cfa-epinow2-pipeline/
 2 | template:
 3 |   bootstrap: 5
 4 | 
 5 | reference:
 6 | - title: Azure
 7 |   desc: Functions which manage interaction with Azure blob
 8 |   contents:
 9 |   - has_concept("azure")
10 | - title: Data
11 |   desc: Example data included in the package
12 |   contents:
13 |   - has_concept("data")
14 | - title: Configuration
15 |   desc: Manages the input of all configuration settings into the `EpiNow2` model
16 |   contents:
17 |   - has_concept("config")
18 | - title: Exclusions
19 |   desc: Functions to handle exclusion of data from models
20 |   contents:
21 |   - has_concept("exclusions")
22 | - title: Diagnostics
23 |   desc: Functions to calculate diagnostics from fitted `EpiNow2` model
24 |   contents:
25 |   - has_concept("diagnostics")
26 | - title: Parameter
27 |   desc: Functions for parameter values that are input into the `EpiNow2` model
28 |   contents:
29 |   - has_concept("parameters")
30 | - title: Pipeline
31 |   desc: Functions to orchestrate running of the pipeline including fitting the
32 |     `EpiNow2` model
33 |   contents:
34 |   - has_concept("pipeline")
35 | - title: Read data
36 |   desc: Functions for data that are input into the `EpiNow2` model
37 |   contents:
38 |   - has_concept("read_data")
39 | - title: Write output
40 |   desc: Functions for post-processing and writing `EpiNow2` model output
41 |   contents:
42 |   - has_concept("write_output")
43 | 


--------------------------------------------------------------------------------
/air.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/air.toml


--------------------------------------------------------------------------------
/azure/generate_configs.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # requires-python = ">=3.13"
 3 | # dependencies = [
 4 | #     "cfa-config-generator",
 5 | #     "typer",
 6 | # ]
 7 | #
 8 | # [tool.uv.sources]
 9 | # cfa-config-generator = { git = "https://github.com/CDCgov/cfa-config-generator" }
10 | # ///
11 | 
12 | 
13 | from datetime import date, datetime, timedelta, timezone
14 | from typing import Annotated
15 | 
16 | import typer
17 | from cfa_config_generator.utils.epinow2.driver_functions import generate_config
18 | 
19 | 
20 | def main(
21 |     state: Annotated[
22 |         str, typer.Option(help="State to generate config for", show_default=False)
23 |     ],
24 |     disease: Annotated[
25 |         str, typer.Option(help="Disease to generate config for", show_default=False)
26 |     ],
27 |     job_id: Annotated[str, typer.Option(help="Job ID to use", show_default=False)],
28 |     report_date_str: Annotated[
29 |         str,
30 |         typer.Option(
31 |             help="Report date in ISO format to generate config for", show_default=False
32 |         ),
33 |     ],
34 |     output_container: Annotated[
35 |         str,
36 |         typer.Option(help="Output container to upload config to", show_default=False),
37 |     ],
38 |     input_container: Annotated[
39 |         str,
40 |         typer.Option(help="Input container to download config from"),
41 |     ] = "nssp-etl",
42 |     production_date_str: Annotated[
43 |         str,
44 |         typer.Option(
45 |             help="Production date in ISO format. Default is today", show_default=False
46 |         ),
47 |     ] = date.today().isoformat(),
48 | ):
49 |     """
50 |     Generate and upload config files for the epinow2 pipeline.
51 |     """
52 |     report_date: date = date.fromisoformat(report_date_str)
53 |     production_date: date = date.fromisoformat(production_date_str)
54 |     now: datetime = datetime.now(timezone.utc)
55 | 
56 |     # Make sure the job ID is not empty.
57 |     if not job_id:
58 |         raise ValueError("Job ID cannot be empty")
59 | 
60 |     # Generate and upload to blob for all states and diseases.
61 |     generate_config(
62 |         state=state,
63 |         disease=disease,
64 |         report_date=report_date,
65 |         reference_dates=[
66 |             report_date - timedelta(days=1),
67 |             report_date - timedelta(weeks=8),
68 |         ],
69 |         data_path=f"gold/{report_date.isoformat()}.parquet",
70 |         data_container=input_container,
71 |         production_date=production_date,
72 |         job_id=job_id,
73 |         as_of_date=now.isoformat(),
74 |         output_container=output_container,
75 |     )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     typer.run(main)
80 | 


--------------------------------------------------------------------------------
/azure/generate_rerun_configs.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # requires-python = ">=3.13"
 3 | # dependencies = [
 4 | #     "cfa-config-generator",
 5 | #     "typer",
 6 | # ]
 7 | #
 8 | # [tool.uv.sources]
 9 | # cfa-config-generator = { git = "https://github.com/CDCgov/cfa-config-generator" }
10 | # ///
11 | 
12 | 
13 | from datetime import date, datetime, timedelta, timezone
14 | from typing import Annotated
15 | 
16 | import typer
17 | from cfa_config_generator.utils.epinow2.driver_functions import generate_rerun_config
18 | 
19 | 
20 | def main(
21 |     job_id: Annotated[str, typer.Option(help="Job ID to use", show_default=False)],
22 |     report_date_str: Annotated[
23 |         str,
24 |         typer.Option(
25 |             help="Report date in ISO format to generate config for", show_default=False
26 |         ),
27 |     ],
28 |     output_container: Annotated[
29 |         str,
30 |         typer.Option(help="Output container to upload config to", show_default=False),
31 |     ],
32 |     input_container: Annotated[
33 |         str,
34 |         typer.Option(help="Input container to download config from"),
35 |     ] = "nssp-etl",
36 |     production_date_str: Annotated[
37 |         str,
38 |         typer.Option(
39 |             help="Production date in ISO format. Default is today", show_default=False
40 |         ),
41 |     ] = date.today().isoformat(),
42 |     data_exclusions_path: Annotated[
43 |         str | None,
44 |         typer.Option(
45 |             help=(
46 |                 "Path to data exclusions file."
47 |                 " Default is to use the report date to generate the path."
48 |                 " You almost certainly do not want to change from this default."
49 |             ),
50 |             show_default=False,
51 |         ),
52 |     ] = None,
53 | ):
54 |     """
55 |     Generate and upload config files for rerunning the epinow2 pipeline.
56 |     """
57 |     report_date: date = date.fromisoformat(report_date_str)
58 |     production_date: date = date.fromisoformat(production_date_str)
59 |     now: datetime = datetime.now(timezone.utc)
60 | 
61 |     # Make sure the job ID is not empty.
62 |     if not job_id:
63 |         raise ValueError("Job ID cannot be empty")
64 | 
65 |     # Generate and upload to blob for all states and diseases.
66 |     generate_rerun_config(
67 |         state="all",
68 |         disease="all",
69 |         report_date=report_date,
70 |         reference_dates=[
71 |             report_date - timedelta(days=1),
72 |             report_date - timedelta(weeks=8),
73 |         ],
74 |         data_path=f"gold/{report_date.isoformat()}.parquet",
75 |         data_container=input_container,
76 |         production_date=production_date,
77 |         job_id=job_id,
78 |         as_of_date=now.isoformat(),
79 |         output_container=output_container,
80 |         data_exclusions_path=data_exclusions_path,
81 |     )
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     typer.run(main)
86 | 


--------------------------------------------------------------------------------
/azure/job.py:
--------------------------------------------------------------------------------
  1 | # /// script
  2 | # requires-python = ">=3.13"
  3 | # dependencies = [
  4 | #     "azure-batch==14.2.0",
  5 | #     "azure-identity==1.21.0",
  6 | #     "azure-storage-blob==12.25.1",
  7 | #     "msrest==0.7.1",
  8 | # ]
  9 | # ///
 10 | import datetime
 11 | import os
 12 | import time
 13 | import uuid
 14 | 
 15 | from msrest.authentication import BasicTokenAuthentication
 16 | 
 17 | import azure.batch.models as batchmodels
 18 | from azure.batch import BatchServiceClient
 19 | from azure.identity import DefaultAzureCredential
 20 | from azure.storage.blob import BlobServiceClient
 21 | 
 22 | 
 23 | def main(image_name: str, config_container: str, pool_id: str, job_id: str):
 24 |     """
 25 |     Submit a job
 26 | 
 27 |     Arguments
 28 |     ----------
 29 |     image_name: str
 30 |         The name of the container image (and tag) to use for the job
 31 |     config_container: str
 32 |         The name of the storage container for the job to output to
 33 |     pool_id: str
 34 |         The name of the pool to use for the job
 35 |     job_id: str
 36 |         The name of the job to use for the job.
 37 |     """
 38 |     blob_account = os.environ["BLOB_ACCOUNT"]
 39 |     blob_url = f"https://{blob_account}.blob.core.windows.net"
 40 |     batch_account = os.environ["BATCH_ACCOUNT"]
 41 |     batch_url = f"https://{batch_account}.eastus.batch.azure.com"
 42 | 
 43 |     # Authenticate with workaround because Batch is the one remaining
 44 |     # service that doesn't yet support Azure auth flow v2 :) :)
 45 |     # https://github.com/Azure/azure-sdk-for-python/issues/30468
 46 |     credential_v2 = DefaultAzureCredential()
 47 |     token = {
 48 |         "access_token": credential_v2.get_token(
 49 |             "https://batch.core.windows.net/.default"
 50 |         ).token
 51 |     }
 52 |     credential_v1 = BasicTokenAuthentication(token)
 53 | 
 54 |     batch_client = BatchServiceClient(credentials=credential_v1, batch_url=batch_url)
 55 | 
 56 |     #############
 57 |     # Set up job
 58 |     batch_job_id = pool_id
 59 |     job = batchmodels.JobAddParameter(
 60 |         id=batch_job_id, pool_info=batchmodels.PoolInformation(pool_id=pool_id)
 61 |     )
 62 | 
 63 |     try:
 64 |         batch_client.job.add(job)
 65 |     except batchmodels.BatchErrorException as err:
 66 |         if err.error.code != "JobExists":
 67 |             raise
 68 |         else:
 69 |             print("Job already exists. Using job object")
 70 | 
 71 |     ##########
 72 |     # Get tasks
 73 |     blob_service_client = BlobServiceClient(blob_url, credential_v2)
 74 |     container_client = blob_service_client.get_container_client(
 75 |         container=config_container
 76 |     )
 77 | 
 78 |     task_configs: list[str] = [
 79 |         b.name for b in container_client.list_blobs() if job_id in b.name
 80 |     ]
 81 |     if len(task_configs) > 0:
 82 |         print(f"Creating {len(task_configs)} tasks in job {job_id} on pool {pool_id}")
 83 |     elif len(task_configs) == 0:
 84 |         raise ValueError("No tasks found")
 85 | 
 86 |     ###########
 87 |     # Set up tasks on job
 88 |     task_container_settings = batchmodels.TaskContainerSettings(
 89 |         image_name=image_name, container_run_options="--rm --workdir /"
 90 |     )
 91 |     task_env_settings = [
 92 |         batchmodels.EnvironmentSetting(
 93 |             name="az_tenant_id", value=os.environ["AZURE_TENANT_ID"]
 94 |         ),
 95 |         batchmodels.EnvironmentSetting(
 96 |             name="az_client_id", value=os.environ["AZURE_CLIENT_ID"]
 97 |         ),
 98 |         batchmodels.EnvironmentSetting(
 99 |             name="az_service_principal", value=os.environ["AZURE_CLIENT_SECRET"]
100 |         ),
101 |     ]
102 | 
103 |     # Run task at the admin level to be able to read/write to mounted drives
104 |     user_identity = batchmodels.UserIdentity(
105 |         auto_user=batchmodels.AutoUserSpecification(
106 |             scope=batchmodels.AutoUserScope.pool,
107 |             elevation_level=batchmodels.ElevationLevel.admin,
108 |         )
109 |     )
110 | 
111 |     for config_path in task_configs:
112 |         command = f"Rscript -e \"CFAEpiNow2Pipeline::orchestrate_pipeline('{config_path}', config_container = '{config_container}', input_dir = '/mnt/input', output_dir = '/mnt/output')\""
113 |         task = batchmodels.TaskAddParameter(
114 |             id=str(uuid.uuid4()),
115 |             command_line=command,
116 |             container_settings=task_container_settings,
117 |             environment_settings=task_env_settings,
118 |             user_identity=user_identity,
119 |         )
120 | 
121 |         batch_client.task.add(batch_job_id, task)
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     from argparse import ArgumentParser
126 | 
127 |     parser = ArgumentParser(
128 |         description="Submit a job to Azure Batch with the specified image and config container"
129 |     )
130 |     parser.add_argument(
131 |         "--image_name",
132 |         type=str,
133 |         help="The name of the container image (and tag) to use for the job",
134 |         required=True,
135 |     )
136 |     parser.add_argument(
137 |         "--config_container",
138 |         type=str,
139 |         help="The name of the storage container for the job to output to",
140 |         required=True,
141 |     )
142 |     parser.add_argument(
143 |         "--pool_id",
144 |         type=str,
145 |         help="The name of the pool to use for the job",
146 |         required=True,
147 |     )
148 |     parser.add_argument(
149 |         "--job_id",
150 |         type=str,
151 |         help="The name of the job to use for the job. Defaults to pool_id",
152 |         default=None,
153 |     )
154 | 
155 |     # Parse the args
156 |     args = parser.parse_args()
157 |     image_name: str = args.image_name
158 |     config_container: str = args.config_container
159 |     pool_id: str = args.pool_id
160 |     # Use pool_id as job_id if not specified
161 |     job_id: str = args.job_id or pool_id
162 | 
163 |     main(
164 |         image_name=image_name,
165 |         config_container=config_container,
166 |         pool_id=pool_id,
167 |         job_id=job_id,
168 |     )
169 | 


--------------------------------------------------------------------------------
/azure/requirements.txt:
--------------------------------------------------------------------------------
 1 | adal==1.2.7
 2 | azure-batch==14.2.0
 3 | azure-common==1.1.28
 4 | azure-core==1.32.0
 5 | azure-identity==1.19.0
 6 | azure-keyvault==4.2.0
 7 | azure-keyvault-certificates==4.9.0
 8 | azure-keyvault-keys==4.10.0
 9 | azure-keyvault-secrets==4.9.0
10 | azure-mgmt-batch==18.0.0
11 | azure-mgmt-core==1.5.0
12 | azure-storage-blob==12.24.0
13 | certifi==2024.8.30
14 | cffi==1.17.1
15 | charset-normalizer==3.4.0
16 | cryptography==44.0.1
17 | idna==3.10
18 | isodate==0.7.2
19 | msal==1.31.1
20 | msal-extensions==1.2.0
21 | msrest==0.7.1
22 | msrestazure==0.6.4.post1
23 | oauthlib==3.2.2
24 | portalocker==2.10.1
25 | pycparser==2.22
26 | PyJWT==2.10.1
27 | python-dateutil==2.9.0.post0
28 | requests==2.32.3
29 | requests-oauthlib==2.0.0
30 | six==1.17.0
31 | toml==0.10.2
32 | typing_extensions==4.12.2
33 | urllib3==2.2.3
34 | 


--------------------------------------------------------------------------------
/code-of-conduct.md:
--------------------------------------------------------------------------------
  1 | # Creating a Culture of Innovation
  2 | We aspire to create a culture where people work joyfully, communicate openly
  3 | about things that matter, and provide great services globally. We would like our
  4 | team and communities (both government and private sector) to reflect on
  5 | diversity of all kinds, not just the classes protected in law. Diversity fosters
  6 | innovation. Diverse teams are creative teams. We need a diversity of perspective
  7 | to create solutions for the challenges we face.
  8 | 
  9 | This is our code of conduct (adapted from [18F's Code of Conduct](https://github.com/18F/code-of-conduct)).
 10 | We follow all Equal Employment Opportunity laws and we expect everyone we work
 11 | with to adhere to the [GSA Anti-harassment Policy](http://www.gsa.gov/portal/directive/d0/content/512516),
 12 | even if they do not work for the Centers for Disease Control and Prevention or
 13 | GSA. We expect every user to follow this code of conduct and the laws and
 14 | policies mentioned above.
 15 | 
 16 | ## Be Empowering
 17 | Consider what you can do to encourage and support others. Make room for quieter
 18 | voices to contribute. Offer support and enthusiasm for great ideas. Leverage the
 19 | low cost of experimentation to support your colleagues' ideas, and take care to
 20 | acknowledge the original source. Look for ways to contribute and collaborate,
 21 | even in situations where you normally wouldn't. Share your knowledge and skills.
 22 | Prioritize access for and input from those who are traditionally excluded from
 23 | the civic process.
 24 | 
 25 | ## Rules of Behavior
 26 |  * I understand that I must complete security awareness and records management
 27 |    training annually in order to comply with the latest security and records
 28 |    management policies.
 29 |  * I understand that I must also follow the [Rules of Behavior for use of HHS Information Resources](http://www.hhs.gov/ocio/policy/hhs-rob.html)
 30 |  * I understand that I must not use, share, or store any kind of sensitive data
 31 |    (health status, provision or payment of healthcare, PII, etc.) under ANY
 32 |    circumstance.
 33 |  * I will not knowingly conceal, falsify, or remove information.
 34 |  * I understand that I can only use non-sensitive and/or publicly available
 35 |    data.
 36 |  * I understand that all passwords I create to set up accounts need to comply
 37 |    with CDC's password policy.
 38 |  * I understand that the stewards reserves the right to moderate all data at any
 39 |    time.
 40 | 
 41 | ## Boundaries
 42 | Create boundaries to your own behavior and consider how you can create a safe
 43 | space that helps prevent unacceptable behavior by others. We can't list all
 44 | instances of unacceptable behavior, but we can provide examples to help guide
 45 | our community in thinking through how to respond when we experience these types
 46 | of behavior, whether directed at ourselves or others.
 47 | 
 48 | If you are unsure if something is appropriate behavior, it probably is not. Each
 49 | person we interact with can define where the line is for them. Impact matters
 50 | more than intent. Ensuring that your behavior does not have a negative impact is
 51 | your responsibility. Problems usually arise when we assume that our way of
 52 | thinking or behavior is the norm for everyone.
 53 | 
 54 | ### Here are some examples of unacceptable behavior
 55 |  * Negative or offensive remarks based on the protected classes as listed in the
 56 |    GSA Anti-harassment Policy of race, religion, color, sex, national origin,
 57 |    age, disability, genetric information, sexual orientation, gender identity,
 58 |    parental status, maritual status, and political affiliation as well as gender
 59 |    expression, mental illness, socioeconomic status or backgrounds,
 60 |    neuro(a)typicality, physical appearance, body size, or clothing. Consider
 61 |    that calling attention to differences can feel alienating.
 62 |  * Sustained disruption of meetings, talks, or discussions, including chatrooms.
 63 |  * Patronizing language or behavior.
 64 |  * Aggressive behavior, such as unconstructive criticism, providing correction
 65 |    that do not improve the conversation (sometimes referred to as "well
 66 |    actually's"), repeatedly interrupting or talking over someone else, feigning
 67 |    surprise at someone's lack of knowledge or awareness about a topic, or subtle
 68 |    prejudice.
 69 |  * Referring to people in a way that misidentifies their gender and/or rejects
 70 |    the validity of their gender identity; for instance by using incorrect
 71 |    pronouns or forms of address (misgendering).
 72 |  * Retaliating against anyone who files a formal complaint that someone has
 73 |    violated these codes or laws.
 74 | 
 75 | ## Background
 76 | CDC Scientific Clearance is the process of obtaining approvals by appropriate
 77 | CDC officials before a CDC information product is released to the public or
 78 | CDC's external public health partners. Information products that require formal
 79 | clearance include print, electronic, or oral materials, that CDC employees
 80 | author or co-author, whether published by CDC or outside CDC. CDC contractors
 81 | developing content on behalf of CDC for the public or CDC's external public
 82 | health partners are also required to put their content through the formal
 83 | clearance process. The collaborative functions related to the projects include
 84 | blogs, wikis, forums, bug tracking sites, source control and
 85 | others deemed as necessary.
 86 | 
 87 | For those individuals within the CDC, adherence to the following policies are
 88 | required:
 89 | * CDC ["Clearance of Information Products Disseminated Outside CDC for Public Use"](http://www.cdc.gov/maso/Policy/PublicUse.pdf)
 90 | * HHS ["Ensuring the Quality of Information Disseminated by HHS agencies"](http://aspe.hhs.gov/infoquality)
 91 | 
 92 | All collaborative materials will be controlled by the rules contained within
 93 | this document. This will allow for the real-time collaboration opportunities
 94 | among CDC employees, CDC contractors and CDC public health partners.
 95 | 
 96 | ## Credit
 97 | This code of conduct was mainly adapted from [18F's Code of Conduct](https://github.com/18F/code-of-conduct)
 98 | and the [CDC's Informatics Innovation Unit R&D Lab's code of conduct.](https://www.philab.cdc.gov/index.php/code-of-conduct/)
 99 | 
100 | ## Relevant Legal Considerations
101 | * [Laws enforced by the Equal Employment Opportunity Commission](http://www.eeoc.gov/laws/statutes/index.cfm)
102 | * [Types of discrimination prohibited by law](http://www.eeoc.gov/laws/types)
103 | * [New and proposed regulations](http://www.eeoc.gov/laws/regulations/index.cfm)
104 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment:
 2 |   layout: "condensed_header, condensed_files, condensed_footer" # add "condensed_" to "header", "files" and "footer"
 3 |   hide_project_coverage: TRUE # set to true
 4 | 
 5 | coverage:
 6 |   status:
 7 |     project:
 8 |       default:
 9 |         target: auto
10 |         threshold: 1%
11 |         informational: true
12 |     patch:
13 |       default:
14 |         target: auto
15 |         threshold: 1%
16 |         informational: true
17 | 


--------------------------------------------------------------------------------
/container-app-jobs/README.md:
--------------------------------------------------------------------------------
 1 | # Container App Job Tools
 2 | 
 3 | This directory contains tools related to executing this pipeline in Azure as a Container App Job.
 4 | 
 5 | ## job-template.yaml
 6 | 
 7 | The *job-template-yaml* file can be passed to the Azure CLI to start a Container App Job from the command line. This allows a user to quickly kick off specific jobs from a WSL console.
 8 | 
 9 | If not previously installed, refer to the documentation [here](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli-linux?pivots=apt) for installation instructions on the CLI itself. The command in Option 1 is the best way to accomplish this:
10 | 
11 | ```bash
12 | curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
13 | ```
14 | 
15 | Update the *job-template.yaml* file with the Azure tenant and client ids, as well as the config file to execute the job on. The job can then be started from the CLI with the following command:
16 | 
17 | ```bash
18 | az containerapp job start --name 'cfa-epinow2-test-caj' --resource-group 'EXT-EDAV-CFA-PRD' --yaml job-template.yaml
19 | ```
20 | 
21 | This command will start the job and return metadata including the newly created job's id. Refer to the Azure portal in a browser to track status and results.
22 | 
23 | ## blob-config-runner
24 | 
25 | The *blob-config-runner* directory contains a Python tool that can start multiple jobs at once. It looks for files within a specified Azure Blob Storage container, presents them to the user for interactive selection, and runs a job on each once confirmed.
26 | 
27 | This tool requires Python 3, which is already installed within WSL. A virtual environment using *venv* is recommended for execution, which can be installed with *apt*. To initialize the environment and necessary libraries, run the following command from the directory:
28 | 
29 | ```bash
30 | python3 -m venv .venv
31 | .venv/bin/python3 -m pip install -r requirements.txt
32 | ```
33 | 
34 | Enter the *config.ini* file's client, tenant, and subscription id values within the Azure section. Update the container name and prefix as needed for this specific run. The env_vars section should not be updated, as these are used by the script to replace the values.  The tool can now be run as follows:
35 | 
36 | ```bash
37 | .venv/bin/python3 start-jobs.py
38 | ```
39 | 
40 | **Note:** This tool identifies config files by looking for a suffix of *-config.json*. This logic could be updated to instead look for tags or metadata, if files were appropriately identified as such within Azure.
41 | 


--------------------------------------------------------------------------------
/container-app-jobs/blob-config-runner/config.ini:
--------------------------------------------------------------------------------
 1 | [env_vars]
 2 | config_file_key = <<_config_file_>>
 3 | tenant_id_key = <<_tenant_id_>>
 4 | tenant_id_label = az_tenant_id
 5 | client_id_key = <<_client_id_>>
 6 | client_id_label = az_client_id
 7 | sp_label = az_service_principal
 8 | sp_ref = az-service-principal
 9 | 
10 | [azure]
11 | account_url = https://cfaazurebatchprd.blob.core.windows.net
12 | container_name = rt-epinow2-config
13 | prefix = Rt-estimation-20250124_172623/configs/
14 | tenant_id_value =
15 | client_id_value =
16 | subscription_id =
17 | 
18 | [caj]
19 | name = cfa-epinow2-test-caj
20 | resource_group = EXT-EDAV-CFA-PRD
21 | command = /pkg/start.sh
22 | image = cfaprdbatchcr.azurecr.io/cfa-epinow2-pipeline:latest
23 | cpu = 4.0
24 | memory = 8Gi
25 | 


--------------------------------------------------------------------------------
/container-app-jobs/blob-config-runner/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-storage-blob
2 | azure-identity
3 | azure-mgmt-appcontainers
4 | 


--------------------------------------------------------------------------------
/container-app-jobs/job-template.yaml:
--------------------------------------------------------------------------------
 1 | # Template file for starting an Azure Container App job running this workflow.
 2 | # A Container App Job must be created and defined in Azure, and its name and
 3 | # resource group passed to the command with this template to start the job execution.
 4 | # Config file, tenant id, and client id need to be set before running.
 5 | # Usage:
 6 | #   az containerapp job start --name <<_job_name_>> --resource-group <<_rg_>> --yaml job-template.yaml
 7 | 
 8 | containers:
 9 | - args: ["<<_config_file_>>"]
10 |   command:
11 |   - /pkg/start.sh
12 |   env:
13 |   - name: az_tenant_id
14 |     value: <<_tenant_id_>>
15 |   - name: az_client_id
16 |     value: <<_client_id_>>
17 |   - name: az_service_principal
18 |     secretRef: az-service-principal   # pragma: allowlist secret
19 |   image: cfaprdbatchcr.azurecr.io/cfa-epinow2-pipeline:latest
20 |   name: cfa-epinow2-test-caj
21 |   resources:
22 |     cpu: 4
23 |     memory: 8Gi
24 | 


--------------------------------------------------------------------------------
/data-raw/convert_gostic_toy_rt_to_test_dataset.R:
--------------------------------------------------------------------------------
 1 | load("data/gostic_toy_rt.rda")
 2 | gostic_toy_rt[["reference_date"]] <- as.Date("2023-01-01") +
 3 |   gostic_toy_rt[["time"]]
 4 | gostic_toy_rt[["report_date"]] <- max(gostic_toy_rt[["reference_date"]]) + 1
 5 | 
 6 | con <- DBI::dbConnect(duckdb::duckdb())
 7 | 
 8 | duckdb::duckdb_register(con, "gostic_toy_rt", gostic_toy_rt)
 9 | dbExecute(
10 |   con,
11 |   "
12 | COPY (
13 |   SELECT
14 |     obs_incidence AS value,
15 |     'test' AS geo_value,
16 |     'test' AS disease,
17 |     'count_ed_visits' AS metric,
18 |     reference_date,
19 |     report_date
20 |   FROM gostic_toy_rt
21 |   ORDER BY reference_date
22 |   LIMIT 150
23 | ) TO
24 |  'tests/testthat/data/test_data.parquet' (FORMAT PARQUET)
25 |  ;
26 |             "
27 | )
28 | 
29 | # Repeat for US overall
30 | dbExecute(
31 |   con,
32 |   "
33 | COPY (
34 |   SELECT
35 |     obs_incidence AS value,
36 |     'US' AS geo_value,
37 |     'test' AS disease,
38 |     'count_ed_visits' AS metric,
39 |     reference_date,
40 |     report_date
41 |   FROM gostic_toy_rt
42 |   ORDER BY reference_date
43 |   LIMIT 150
44 | ) TO
45 |  'tests/testthat/data/us_overall_test_data.parquet' (FORMAT PARQUET)
46 |  ;
47 |             "
48 | )
49 | dbDisconnect(con)
50 | 


--------------------------------------------------------------------------------
/data-raw/sir_gt_pmf.R:
--------------------------------------------------------------------------------
 1 | # E and I compartments both with exponentially distributed residence times
 2 | # with a mean of 4 days.
 3 | shape <- 2
 4 | rate <- 1 / 4
 5 | 
 6 | sir_gt_pmf <- primarycensored::dpcens(
 7 |   0:26,
 8 |   pgamma,
 9 |   shape = shape,
10 |   rate = rate,
11 |   D = 27
12 | ) # v0.4.0
13 | 
14 | # Drop first element because GI can't have same-day transmission
15 | # and replace with a zero
16 | sir_gt_pmf <- c(0, sir_gt_pmf[2:27])
17 | 
18 | # Renormalize to a proper PMF
19 | while (abs(sum(sir_gt_pmf) - 1) > 1e-10) {
20 |   sir_gt_pmf <- sir_gt_pmf / sum(sir_gt_pmf)
21 | }
22 | 
23 | usethis::use_data(sir_gt_pmf, overwrite = TRUE)
24 | 


--------------------------------------------------------------------------------
/data/gostic_toy_rt.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/data/gostic_toy_rt.rda


--------------------------------------------------------------------------------
/data/sir_gt_pmf.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/data/sir_gt_pmf.rda


--------------------------------------------------------------------------------
/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/image.png


--------------------------------------------------------------------------------
/man/Config.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{Config}
 4 | \alias{Config}
 5 | \title{Config Class}
 6 | \usage{
 7 | Config(
 8 |   job_id = character(0),
 9 |   task_id = character(0),
10 |   min_reference_date = character(0),
11 |   max_reference_date = character(0),
12 |   report_date = character(0),
13 |   production_date = character(0),
14 |   disease = character(0),
15 |   geo_value = character(0),
16 |   geo_type = character(0),
17 |   seed = integer(0),
18 |   horizon = integer(0),
19 |   model = "EpiNow2",
20 |   config_version = character(0),
21 |   quantile_width = c(0.5, 0.95),
22 |   data = Data(),
23 |   priors = list(),
24 |   parameters = Parameters(),
25 |   sampler_opts = list(),
26 |   exclusions = Exclusions(),
27 |   output_container = character(0)
28 | )
29 | }
30 | \arguments{
31 | \item{job_id}{A string specifying the job.}
32 | 
33 | \item{task_id}{A string specifying the task.}
34 | 
35 | \item{min_reference_date}{A string representing the minimum reference
36 | date. Formatted as "YYYY-MM-DD".}
37 | 
38 | \item{max_reference_date}{A string representing the maximum reference
39 | date. Formatted as "YYYY-MM-DD".}
40 | 
41 | \item{report_date}{A string representing the report date. Formatted as
42 | "YYYY-MM-DD".}
43 | 
44 | \item{production_date}{A string representing the production date.
45 | Formatted as "YYYY-MM-DD".}
46 | 
47 | \item{disease}{A string specifying the disease being modeled. One of
48 | \code{"COVID-19"} or \code{"Influenza"} or \code{"RSV"}.}
49 | 
50 | \item{geo_value}{An uppercase, two-character string specifying the geographic
51 | value, usually a state or \code{"US"} for national data.}
52 | 
53 | \item{geo_type}{A string specifying the geographic type, usually "state".}
54 | 
55 | \item{seed}{An integer for setting the random seed.}
56 | 
57 | \item{horizon}{An integer specifying the forecasting horizon.}
58 | 
59 | \item{model}{A string specifying the model to be used.}
60 | 
61 | \item{config_version}{A numeric value specifying the configuration version.}
62 | 
63 | \item{quantile_width}{A vector of numeric values representing the desired
64 | quantiles. Passed to \code{\link[tidybayes:reexports]{tidybayes::median_qi()}}.}
65 | 
66 | \item{data}{An instance of \code{Data} class containing data configurations.}
67 | 
68 | \item{priors}{A list of lists. The first level should contain the key \code{rt}
69 | with elements \code{mean} and \code{sd} and the key \code{gp} with element \code{alpha_sd}.}
70 | 
71 | \item{parameters}{An instance of \code{Parameters} class containing parameter
72 | configurations.}
73 | 
74 | \item{sampler_opts}{A list. The Stan sampler options to be passed through
75 | EpiNow2. It has required keys: \code{cores}, \code{chains}, \code{iter_warmup},
76 | \code{iter_sampling}, \code{max_treedepth}, and \code{adapt_delta}.}
77 | 
78 | \item{exclusions}{An instance of \code{Exclusions} class containing exclusion
79 | criteria.}
80 | 
81 | \item{output_container}{An optional string specifying the output blob storage
82 | container.}
83 | }
84 | \description{
85 | Represents the complete configuration for the pipeline.
86 | }
87 | \seealso{
88 | Other config: 
89 | \code{\link{Data}()},
90 | \code{\link{Exclusions}()},
91 | \code{\link{Interval}},
92 | \code{\link{Parameters}()},
93 | \code{\link{read_json_into_config}()}
94 | }
95 | \concept{config}
96 | 


--------------------------------------------------------------------------------
/man/Data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{Data}
 4 | \alias{Data}
 5 | \title{Data Class}
 6 | \usage{
 7 | Data(
 8 |   path = character(0),
 9 |   blob_storage_container = character(0),
10 |   report_date = character(0),
11 |   reference_date = character(0)
12 | )
13 | }
14 | \arguments{
15 | \item{path}{A string specifying the path to the data Parquet file.}
16 | 
17 | \item{blob_storage_container}{Optional. The name of the blob storage
18 | container to which the data file will be uploaded. If NULL, no upload will
19 | occur.}
20 | 
21 | \item{report_date}{A list of strings representing report dates.}
22 | 
23 | \item{reference_date}{A list of strings representing reference dates.}
24 | }
25 | \description{
26 | Represents the data-related configurations.
27 | }
28 | \seealso{
29 | Other config: 
30 | \code{\link{Config}()},
31 | \code{\link{Exclusions}()},
32 | \code{\link{Interval}},
33 | \code{\link{Parameters}()},
34 | \code{\link{read_json_into_config}()}
35 | }
36 | \concept{config}
37 | 


--------------------------------------------------------------------------------
/man/Exclusions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{Exclusions}
 4 | \alias{Exclusions}
 5 | \title{Exclusions Class}
 6 | \usage{
 7 | Exclusions(path = character(0), blob_storage_container = character(0))
 8 | }
 9 | \arguments{
10 | \item{path}{A string specifying the path to a CSV file containing exclusion
11 | data. It should include at least the columns: \code{reference_date},
12 | \code{report_date}, \code{state}, \code{disease}.}
13 | 
14 | \item{blob_storage_container}{Optional. The name of the blob storage
15 | container to get it from. If NULL, will look locally.}
16 | }
17 | \description{
18 | Represents exclusion criteria for the pipeline.
19 | }
20 | \seealso{
21 | Other config: 
22 | \code{\link{Config}()},
23 | \code{\link{Data}()},
24 | \code{\link{Interval}},
25 | \code{\link{Parameters}()},
26 | \code{\link{read_json_into_config}()}
27 | }
28 | \concept{config}
29 | 


--------------------------------------------------------------------------------
/man/Interval.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{Interval}
 4 | \alias{Interval}
 5 | \alias{GenerationInterval}
 6 | \alias{DelayInterval}
 7 | \alias{RightTruncation}
 8 | \title{Interval Class}
 9 | \usage{
10 | Interval(path = character(0), blob_storage_container = character(0))
11 | 
12 | GenerationInterval(path = character(0), blob_storage_container = character(0))
13 | 
14 | DelayInterval(path = character(0), blob_storage_container = character(0))
15 | 
16 | RightTruncation(path = character(0), blob_storage_container = character(0))
17 | }
18 | \arguments{
19 | \item{path}{A string specifying the path to the generation interval CSV file.}
20 | 
21 | \item{blob_storage_container}{Optional. The name of the blob storage
22 | container to get it from. If NULL, will look locally.}
23 | }
24 | \description{
25 | Represents a generic interval. Meant to be subclassed.
26 | 
27 | Represents the generation interval parameters.
28 | 
29 | Represents the delay interval parameters.
30 | 
31 | Represents the right truncation parameters.
32 | }
33 | \seealso{
34 | Other config: 
35 | \code{\link{Config}()},
36 | \code{\link{Data}()},
37 | \code{\link{Exclusions}()},
38 | \code{\link{Parameters}()},
39 | \code{\link{read_json_into_config}()}
40 | 
41 | Other config: 
42 | \code{\link{Config}()},
43 | \code{\link{Data}()},
44 | \code{\link{Exclusions}()},
45 | \code{\link{Parameters}()},
46 | \code{\link{read_json_into_config}()}
47 | 
48 | Other config: 
49 | \code{\link{Config}()},
50 | \code{\link{Data}()},
51 | \code{\link{Exclusions}()},
52 | \code{\link{Parameters}()},
53 | \code{\link{read_json_into_config}()}
54 | 
55 | Other config: 
56 | \code{\link{Config}()},
57 | \code{\link{Data}()},
58 | \code{\link{Exclusions}()},
59 | \code{\link{Parameters}()},
60 | \code{\link{read_json_into_config}()}
61 | }
62 | \concept{config}
63 | 


--------------------------------------------------------------------------------
/man/Parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{Parameters}
 4 | \alias{Parameters}
 5 | \title{Parameters Class}
 6 | \usage{
 7 | Parameters(
 8 |   as_of_date = character(0),
 9 |   generation_interval = GenerationInterval(),
10 |   delay_interval = DelayInterval(),
11 |   right_truncation = RightTruncation()
12 | )
13 | }
14 | \arguments{
15 | \item{as_of_date}{A string representing the as-of date. Formatted as
16 | "YYYY-MM-DD".}
17 | 
18 | \item{generation_interval}{An instance of \code{GenerationInterval} class.}
19 | 
20 | \item{delay_interval}{An instance of \code{DelayInterval} class.}
21 | 
22 | \item{right_truncation}{An instance of \code{RightTruncation} class.}
23 | }
24 | \description{
25 | Holds all parameter-related configurations for the pipeline.
26 | }
27 | \seealso{
28 | Other config: 
29 | \code{\link{Config}()},
30 | \code{\link{Data}()},
31 | \code{\link{Exclusions}()},
32 | \code{\link{Interval}},
33 | \code{\link{read_json_into_config}()}
34 | }
35 | \concept{config}
36 | 


--------------------------------------------------------------------------------
/man/apply_exclusions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exclusions.R
 3 | \name{apply_exclusions}
 4 | \alias{apply_exclusions}
 5 | \title{Convert case counts in matching rows to NA}
 6 | \usage{
 7 | apply_exclusions(cases, exclusions)
 8 | }
 9 | \arguments{
10 | \item{cases}{A dataframe returned by \code{\link[=read_data]{read_data()}}}
11 | 
12 | \item{exclusions}{A dataframe returned by \code{\link[=read_exclusions]{read_exclusions()}}}
13 | }
14 | \value{
15 | A dataframe with the same rows and schema as \code{cases} where the value
16 | in the column \code{confirm} converted to NA in any rows that match a row in
17 | \code{exclusions}
18 | }
19 | \description{
20 | Mark selected points to be ignored in model fitting. This manual selection
21 | occurs externally to the pipeline and is passed to the pipeline in an
22 | exclusions file read with \code{\link[=read_exclusions]{read_exclusions()}}. Mechanically, the exclusions
23 | are applied by converting specified points to NAs in the dataset. NAs are
24 | skipped in model fitting by EpiNow2, so matched rows are excluded from the
25 | model likelihood.
26 | }
27 | \seealso{
28 | Other exclusions: 
29 | \code{\link{read_exclusions}()}
30 | }
31 | \concept{exclusions}
32 | 


--------------------------------------------------------------------------------
/man/download_file_from_container.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/azure.R
 3 | \name{download_file_from_container}
 4 | \alias{download_file_from_container}
 5 | \title{Download specified blobs from Blob Storage and save them in a local dir}
 6 | \usage{
 7 | download_file_from_container(
 8 |   blob_storage_path,
 9 |   local_file_path,
10 |   storage_container
11 | )
12 | }
13 | \arguments{
14 | \item{blob_storage_path}{A character of a blob in \code{storage_container}}
15 | 
16 | \item{local_file_path}{The local path to save the blob}
17 | 
18 | \item{storage_container}{The blob storage container with \code{blob_storage_path}}
19 | }
20 | \value{
21 | Invisibly, \code{local_file_path}
22 | }
23 | \description{
24 | Download specified blobs from Blob Storage and save them in a local dir
25 | }
26 | \seealso{
27 | Other azure: 
28 | \code{\link{download_if_specified}()},
29 | \code{\link{fetch_blob_container}()},
30 | \code{\link{fetch_credential_from_env_var}()}
31 | }
32 | \concept{azure}
33 | 


--------------------------------------------------------------------------------
/man/download_if_specified.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/azure.R
 3 | \name{download_if_specified}
 4 | \alias{download_if_specified}
 5 | \title{Download if specified}
 6 | \usage{
 7 | download_if_specified(blob_path, blob_storage_container, dir)
 8 | }
 9 | \arguments{
10 | \item{blob_path}{The name of the blob to download}
11 | 
12 | \item{blob_storage_container}{The name of the container to download from}
13 | 
14 | \item{dir}{The directory to which to write the downloaded file}
15 | }
16 | \value{
17 | The path of the file
18 | }
19 | \description{
20 | Download if specified
21 | }
22 | \seealso{
23 | Other azure: 
24 | \code{\link{download_file_from_container}()},
25 | \code{\link{fetch_blob_container}()},
26 | \code{\link{fetch_credential_from_env_var}()}
27 | }
28 | \concept{azure}
29 | 


--------------------------------------------------------------------------------
/man/extract_diagnostics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/diagnostics.R
 3 | \name{extract_diagnostics}
 4 | \alias{extract_diagnostics}
 5 | \title{Extract diagnostic metrics from model fit and data}
 6 | \usage{
 7 | extract_diagnostics(fit, data, job_id, task_id, disease, geo_value, model)
 8 | }
 9 | \arguments{
10 | \item{fit}{The model fit object from \code{EpiNow2}}
11 | 
12 | \item{data}{A data frame containing the input data used in the model fit.}
13 | 
14 | \item{job_id}{A string specifying the job.}
15 | 
16 | \item{task_id}{A string specifying the task.}
17 | 
18 | \item{disease}{A string specifying the disease being modeled. One of
19 | \code{"COVID-19"} or \code{"Influenza"} or \code{"RSV"}.}
20 | 
21 | \item{geo_value}{An uppercase, two-character string specifying the geographic
22 | value, usually a state or \code{"US"} for national data.}
23 | 
24 | \item{model}{A string specifying the model to be used.}
25 | }
26 | \value{
27 | A \code{data.frame} containing the extracted diagnostic metrics. The
28 | data frame includes the following columns:
29 | \itemize{
30 | \item \code{diagnostic}: The name of the diagnostic metric.
31 | \item \code{value}: The value of the diagnostic metric.
32 | \item \code{job_id}: The unique identifier for the job.
33 | \item \code{task_id}: The unique identifier for the task.
34 | \item \code{disease,geo_value,model}: Metadata for downstream processing.
35 | }
36 | }
37 | \description{
38 | This function extracts various diagnostic metrics from a fitted \code{EpiNow2}
39 | model and provided data. It checks for low case counts and computes
40 | diagnostics from the fitted model, including the mean acceptance
41 | statistic, divergent transitions, maximum tree depth, and Rhat values.
42 | Additionally, a combined flag is computed indicating if any diagnostics
43 | are outside an acceptable range. The results are returned as a data frame.
44 | }
45 | \details{
46 | The following diagnostics are calculated:
47 | \itemize{
48 | \item \code{mean_accept_stat}: The average acceptance statistic across
49 | all chains.
50 | \item \code{p_divergent}: The \emph{proportion} of divergent transitions across
51 | all samples.
52 | \item \code{n_divergent}: The \emph{number} of divergent transitions across
53 | all samples.
54 | \item \code{p_max_treedepth}: The proportion of samples that hit the
55 | maximum tree depth.
56 | \item \code{p_high_rhat}: The \emph{proportion} of parameters with Rhat values
57 | greater than 1.05, indicating potential convergence issues.
58 | \item \code{n_high_rhat}: The \emph{number} of parameters with Rhat values
59 | greater than 1.05, indicating potential convergence issues.
60 | \item \code{low_case_count_flag}: A flag indicating if there are low case
61 | counts in the data. See \code{low_case_count_diagnostic()} for more
62 | information on this diagnostic.
63 | \item \code{epinow2_diagnostic_flag}: A combined flag that indicates if
64 | any diagnostic metrics are outside an accepted range, as determined
65 | by the thresholds: (1) mean_accept_stat < 0.1, (2) p_divergent >
66 | 0.0075, (3) p_max_treedepth > 0.05, and (4) p_high_rhat > 0.0075.
67 | }
68 | }
69 | \seealso{
70 | Other diagnostics: 
71 | \code{\link{low_case_count_diagnostic}()}
72 | }
73 | \concept{diagnostics}
74 | 


--------------------------------------------------------------------------------
/man/fetch_blob_container.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/azure.R
 3 | \name{fetch_blob_container}
 4 | \alias{fetch_blob_container}
 5 | \title{Load Azure Blob container using credentials in environment variables}
 6 | \usage{
 7 | fetch_blob_container(container_name)
 8 | }
 9 | \arguments{
10 | \item{container_name}{The Azure Blob Storage container associated with the
11 | credentials}
12 | }
13 | \value{
14 | A Blob endpoint
15 | }
16 | \description{
17 | This function depends on the following Azure credentials stored in
18 | environment variables:
19 | }
20 | \details{
21 | \itemize{
22 | \item \code{az_tenant_id}: an Azure Active Directory (AAD) tenant ID
23 | \item \code{az_subscription_id}: an Azure subscription ID
24 | \item \code{az_resource_group}: The name of the Azure resource group
25 | \item \code{az_storage_account}: The name of the Azure storage account
26 | }
27 | 
28 | As a result it is an impure function, and should be used bearing that
29 | warning in mind. Each variable is obtained using
30 | \code{\link[=fetch_credential_from_env_var]{fetch_credential_from_env_var()}} (which will return an error if the
31 | credential is not specified or empty).
32 | }
33 | \seealso{
34 | Other azure: 
35 | \code{\link{download_file_from_container}()},
36 | \code{\link{download_if_specified}()},
37 | \code{\link{fetch_credential_from_env_var}()}
38 | }
39 | \concept{azure}
40 | 


--------------------------------------------------------------------------------
/man/fetch_credential_from_env_var.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/azure.R
 3 | \name{fetch_credential_from_env_var}
 4 | \alias{fetch_credential_from_env_var}
 5 | \title{Fetch Azure credential from environment variable}
 6 | \usage{
 7 | fetch_credential_from_env_var(env_var)
 8 | }
 9 | \arguments{
10 | \item{env_var}{A character, the credential to fetch}
11 | }
12 | \value{
13 | The associated value
14 | }
15 | \description{
16 | And throw an informative error if credential is not found
17 | }
18 | \seealso{
19 | Other azure: 
20 | \code{\link{download_file_from_container}()},
21 | \code{\link{download_if_specified}()},
22 | \code{\link{fetch_blob_container}()}
23 | }
24 | \concept{azure}
25 | 


--------------------------------------------------------------------------------
/man/fit_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fit_model.R
 3 | \name{fit_model}
 4 | \alias{fit_model}
 5 | \title{Fit an \code{EpiNow2} model}
 6 | \usage{
 7 | fit_model(data, parameters, seed, horizon, priors, sampler_opts)
 8 | }
 9 | \arguments{
10 | \item{data, }{in the format returned by \code{\link[=read_data]{read_data()}}}
11 | 
12 | \item{parameters}{As returned from \code{\link[=read_disease_parameters]{read_disease_parameters()}}}
13 | 
14 | \item{seed}{The random seed, used for both initialization by \code{EpiNow2} in R
15 | and sampling in Stan}
16 | 
17 | \item{horizon}{The number of days, as an integer, to forecast}
18 | 
19 | \item{priors}{A list of lists. The first level should contain the key \code{rt}
20 | with elements \code{mean} and \code{sd} and the key \code{gp} with element \code{alpha_sd}.}
21 | 
22 | \item{sampler_opts}{A list. The Stan sampler options to be passed through
23 | EpiNow2. It has required keys: \code{cores}, \code{chains}, \code{iter_warmup},
24 | \code{iter_sampling}, \code{max_treedepth}, and \code{adapt_delta}.}
25 | }
26 | \value{
27 | A fitted model object of class \code{epinow} or, if model fitting fails,
28 | an NA is returned with a warning
29 | }
30 | \description{
31 | Fit an \code{EpiNow2} model
32 | }
33 | \seealso{
34 | Other pipeline: 
35 | \code{\link{format_stan_opts}()},
36 | \code{\link{orchestrate_pipeline}()}
37 | }
38 | \concept{pipeline}
39 | 


--------------------------------------------------------------------------------
/man/format_stan_opts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fit_model.R
 3 | \name{format_stan_opts}
 4 | \alias{format_stan_opts}
 5 | \title{Format Stan options for input to EpiNow2}
 6 | \usage{
 7 | format_stan_opts(sampler_opts, seed)
 8 | }
 9 | \arguments{
10 | \item{sampler_opts}{A list. The Stan sampler options to be passed through
11 | EpiNow2. It has required keys: \code{cores}, \code{chains}, \code{iter_warmup},
12 | \code{iter_sampling}, \code{max_treedepth}, and \code{adapt_delta}.}
13 | 
14 | \item{seed}{A stochastic seed passed here to the Stan sampler and as the R
15 | PRNG seed for \code{EpiNow2} initialization}
16 | }
17 | \value{
18 | A \code{stan_opts} object of arguments
19 | }
20 | \description{
21 | Format configuration \code{sampler_opts} for input to \code{EpiNow2} via a call to
22 | \code{\link[EpiNow2:stan_opts]{EpiNow2::stan_opts()}}.
23 | }
24 | \seealso{
25 | Other pipeline: 
26 | \code{\link{fit_model}()},
27 | \code{\link{orchestrate_pipeline}()}
28 | }
29 | \concept{pipeline}
30 | 


--------------------------------------------------------------------------------
/man/gostic_toy_rt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{gostic_toy_rt}
 5 | \alias{gostic_toy_rt}
 6 | \title{Synthetic dataset of stochastic SIR system with known Rt}
 7 | \format{
 8 | \code{gostic_toy_rt} A data frame with 301 rows and 12 columns:
 9 | \describe{
10 | \item{time}{Timestep of the discrete-time stochastic SEIR simulation}
11 | \item{date}{Added from the original Gostic, 2020 dataset. A date
12 | corresponding to the assigned \code{time}. Arbitrarily starts on January 1st,
13 | 2023.}
14 | \item{S, E, I, R}{The realized state of the stochastic SEIR system}
15 | \item{dS, dEI, DIR}{The stochastic transition between compartments}
16 | \item{incidence}{The true incidence in the \code{I} compartment at time t}
17 | \item{obs_cases}{The observed number of cases at time t from
18 | forward-convolved incidence.}
19 | \item{obs_incidence}{Added from the original Gostic, 2020 dataset. The
20 | \code{incidence} column with added negative-binomial observation noise.
21 | Created with \code{set.seed(123456)} and the call
22 | \code{rnbinom(299, mu = gostic_toy_rt[["incidence"]], size = 10)} Useful for
23 | testing.}
24 | \item{true_r0}{The initial R0 of the system (i.e., 2)}
25 | \item{true_rt}{The known, true Rt of the epidemic system}
26 | }
27 | }
28 | \source{
29 | \url{https://github.com/cobeylab/Rt_estimation/tree/d9d8977ba8492ac1a3b8287d2f470b313bfb9f1d} # nolint
30 | }
31 | \usage{
32 | gostic_toy_rt
33 | }
34 | \description{
35 | A dataset from Gostic, Katelyn M., et al. "Practical considerations for
36 | measuring the effective reproductive number, Rt." PLoS Computational Biology
37 | 16.12 (2020): e1008409. The data are simulated from a stochastic SEIR
38 | compartmental model.
39 | }
40 | \details{
41 | This synthetic dataset has a number of desirable properties:
42 | \enumerate{
43 | \item The force of infection changes depending on the Rt, allowing for sudden
44 | changes in the Rt. This allows for modeling of sudden changes in infection
45 | dynamics, which might otherwise be difficult to capture. Rt estimation
46 | framework
47 | \item The realized Rt is known at each timepoint
48 | \item The dataset incorporates a simple generation interval and a reporting
49 | delay.
50 | }
51 | 
52 | Gostic et al. benchmark the performance of a number of Rt estimation
53 | frameworks, providing practical guidance on how to use this dataset to
54 | evaluate Rt estimates.
55 | 
56 | In practice, we've found that the amount of observation noise in the
57 | incidence and/or observed cases is often undesirably low for testing. Many
58 | empirical datasets are much noisier. As a result, models built with these
59 | settings in mind can perform poorly on this dataset or fail to converge. To
60 | the original dataset, we add a new column with the original incidence counts
61 | with additional observation noise: \code{obs_incidence}. We manually add
62 | observation noise with \code{rnbinom(299, mu = gostic_toy_rt[["obs_cases"]], size = 10)} and the random seed 123456 and store it in the \code{obs_incidence} column.
63 | }
64 | \seealso{
65 | Other data: 
66 | \code{\link{sir_gt_pmf}}
67 | }
68 | \concept{data}
69 | \keyword{datasets}
70 | 


--------------------------------------------------------------------------------
/man/low_case_count_diagnostic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/diagnostics.R
 3 | \name{low_case_count_diagnostic}
 4 | \alias{low_case_count_diagnostic}
 5 | \title{Calculate low case count diagnostic flag}
 6 | \usage{
 7 | low_case_count_diagnostic(df)
 8 | }
 9 | \arguments{
10 | \item{df}{A dataframe as returned by \code{\link[=read_data]{read_data()}}. The dataframe must
11 | include columns such as \code{reference_date} (a date vector) and \code{confirm}
12 | (the number of confirmed cases per day).}
13 | }
14 | \value{
15 | A logical value (TRUE or FALSE) indicating whether either of the last
16 | two weeks in the dataset had fewer than 10 cases per week.
17 | }
18 | \description{
19 | The diagnostic flag is TRUE if either of the \emph{last} two weeks of the dataset
20 | have fewer than an aggregate 10 cases per week. This aggregation excludes the
21 | count from confirmed outliers, which have been set to NA in the data.
22 | }
23 | \details{
24 | This function assumes that the \code{df} input dataset has been
25 | "completed": that any implicit missingness has been made explicit.
26 | }
27 | \seealso{
28 | Other diagnostics: 
29 | \code{\link{extract_diagnostics}()}
30 | }
31 | \concept{diagnostics}
32 | 


--------------------------------------------------------------------------------
/man/opts_formatter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parameters.R
 3 | \name{opts_formatter}
 4 | \alias{opts_formatter}
 5 | \alias{format_generation_interval}
 6 | \alias{format_delay_interval}
 7 | \alias{format_right_truncation}
 8 | \title{Format PMFs for EpiNow2}
 9 | \usage{
10 | format_generation_interval(pmf)
11 | 
12 | format_delay_interval(pmf)
13 | 
14 | format_right_truncation(pmf, data)
15 | }
16 | \arguments{
17 | \item{pmf}{As returned by \code{\link[=read_disease_parameters]{read_disease_parameters()}}. A PMF vector or an NA,
18 | if not applying the PMF to the model fit.}
19 | 
20 | \item{data}{in the format returned by \code{\link[=read_data]{read_data()}}}
21 | }
22 | \value{
23 | An \verb{EpiNow2::*_opts()} formatted object or NA with a message
24 | }
25 | \description{
26 | Opinionated wrappers around \code{\link[EpiNow2:generation_time_opts]{EpiNow2::generation_time_opts()}},
27 | \code{\link[EpiNow2:delay_opts]{EpiNow2::delay_opts()}}, or \code{\link[EpiNow2:dist_spec]{EpiNow2::dist_spec()}} which format the
28 | generation interval, delay, or right truncation parameters as an object ready
29 | for input to \code{EpiNow2}.
30 | }
31 | \details{
32 | Delays or right truncation are optional and can be skipped by passing \code{pmf = NA}.
33 | }
34 | \seealso{
35 | Other parameters: 
36 | \code{\link{read_disease_parameters}()},
37 | \code{\link{read_interval_pmf}()}
38 | }
39 | \concept{parameters}
40 | 


--------------------------------------------------------------------------------
/man/pipeline.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/pipeline.R
  3 | \name{orchestrate_pipeline}
  4 | \alias{orchestrate_pipeline}
  5 | \alias{execute_model_logic}
  6 | \title{Run an Rt Estimation Model Pipeline}
  7 | \usage{
  8 | orchestrate_pipeline(
  9 |   config_path,
 10 |   config_container = NULL,
 11 |   input_dir = "/input",
 12 |   output_dir = "/output"
 13 | )
 14 | 
 15 | execute_model_logic(config, input_dir, output_dir)
 16 | }
 17 | \arguments{
 18 | \item{config_path}{A string specifying the file path to the JSON
 19 | configuration file.}
 20 | 
 21 | \item{config_container}{Optional. The name of the blob storage container
 22 | from which the config file will be downloaded.}
 23 | 
 24 | \item{input_dir}{A string specifying the directory to read inputs from. If
 25 | passing storage containers, this is where the files will be downloaded to.}
 26 | 
 27 | \item{output_dir}{A string specifying the directory where output, logs, and
 28 | other pipeline artifacts will be saved. Defaults to the root directory ("/").}
 29 | 
 30 | \item{config}{A Config object containing configuration settings for the
 31 | pipeline, including paths to data, exclusions, disease parameters, model
 32 | settings, and other necessary inputs.}
 33 | }
 34 | \value{
 35 | The function returns a boolean, TRUE For pipeline success and FALSE
 36 | otherwise. It writes the files:
 37 | directory will contain the following files:
 38 | \itemize{
 39 | \item Model RDS file (\code{model.rds})
 40 | \item Sample output in Parquet format (\verb{<task_id>.parquet} in the \verb{samples/}
 41 | directory)
 42 | \item Summary output in Parquet format (\verb{<task_id>.parquet} in the \verb{summaries/}
 43 | directory)
 44 | \item Log file (\code{logs.txt}) in the task directory
 45 | }
 46 | 
 47 | Returns \code{TRUE} on success. Errors are caught by the outer pipeline
 48 | logic and logged accordingly.
 49 | }
 50 | \description{
 51 | This function runs a complete pipeline for fitting an Rt estimation model,
 52 | using the \code{EpiNow2} model, based on a configuration file. The pipeline
 53 | processes the model, logs its progress, and handles errors by logging
 54 | warnings and setting the pipeline status. Output and logs are written to
 55 | the specified directories. Additionally, support for uploading logs and
 56 | outputs to a blob storage container is planned.
 57 | }
 58 | \details{
 59 | The function reads the configuration from a JSON file and uses this to set
 60 | up the job and task identifiers. It creates an output directory structure
 61 | based on these IDs and starts logging the process in a file. The main
 62 | pipeline process is handled by \code{execute_model_logic()}, with errors
 63 | caught and logged as warnings. The function will log the success or
 64 | failure of the run.
 65 | 
 66 | Logs are written to a file in the output directory, and console output is
 67 | also mirrored in this log file. Error handling is in place to capture any
 68 | issues during the pipeline execution and ensure they are logged
 69 | appropriately.
 70 | 
 71 | During the execution of the pipeline, the following output files are
 72 | expected to be generated:
 73 | \itemize{
 74 | \item \strong{Model Output}: An RDS file of the fitted model is saved in the
 75 | task-specific directory (\code{model.rds}).
 76 | \item \strong{Samples}: Parquet files containing the model's sample outputs are saved
 77 | in a \code{samples} subdirectory, named using the \code{task_id} (e.g.,
 78 | \code{task_id.parquet}).
 79 | \item \strong{Summaries}: Parquet files summarizing the model's results are saved in
 80 | a \code{summaries} subdirectory, also named using the \code{task_id} (e.g.,
 81 | \code{task_id.parquet}).
 82 | \item \strong{Logs}: A \code{logs.txt} file is generated in the task directory, capturing
 83 | both console and error messages.
 84 | }
 85 | 
 86 | The output directory structure will follow this format:
 87 | 
 88 | \if{html}{\out{<div class="sourceCode">}}\preformatted{<output_dir>/
 89 | └── <job_id>/
 90 |     ├── samples/
 91 |     │   └── <task_id>.parquet
 92 |     ├── summaries/
 93 |     │   └── <task_id>.parquet
 94 |     └── tasks/
 95 |         └── <task_id>/
 96 |             ├── model.rds
 97 |             └── logs.txt
 98 | }\if{html}{\out{</div>}}
 99 | 
100 | This function performs the core model fitting process within the Rt
101 | estimation pipeline, including reading data, applying exclusions, fitting
102 | the model, and writing outputs such as model samples, summaries, and logs.
103 | }
104 | \seealso{
105 | Other pipeline: 
106 | \code{\link{fit_model}()},
107 | \code{\link{format_stan_opts}()}
108 | 
109 | Other pipeline: 
110 | \code{\link{fit_model}()},
111 | \code{\link{format_stan_opts}()}
112 | }
113 | \concept{pipeline}
114 | 


--------------------------------------------------------------------------------
/man/read_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read_data.R
 3 | \name{read_data}
 4 | \alias{read_data}
 5 | \title{Read in the dataset of incident case counts}
 6 | \usage{
 7 | read_data(
 8 |   data_path,
 9 |   disease = c("COVID-19", "Influenza", "RSV", "test"),
10 |   geo_value,
11 |   report_date,
12 |   max_reference_date,
13 |   min_reference_date
14 | )
15 | }
16 | \arguments{
17 | \item{data_path}{The path to the local file. This could contain a glob and
18 | must be in parquet format.}
19 | 
20 | \item{disease}{A string specifying the disease being modeled. One of
21 | \code{"COVID-19"} or \code{"Influenza"} or \code{"RSV"}.}
22 | 
23 | \item{geo_value}{An uppercase, two-character string specifying the geographic
24 | value, usually a state or \code{"US"} for national data.}
25 | 
26 | \item{report_date}{A string representing the report date. Formatted as
27 | "YYYY-MM-DD".}
28 | 
29 | \item{max_reference_date}{A string representing the maximum reference
30 | date. Formatted as "YYYY-MM-DD".}
31 | 
32 | \item{min_reference_date}{A string representing the minimum reference
33 | date. Formatted as "YYYY-MM-DD".}
34 | }
35 | \value{
36 | A dataframe with one or more rows and columns \code{report_date},
37 | \code{reference_date}, \code{geo_value}, \code{confirm}
38 | }
39 | \description{
40 | Each row of the table corresponds to a single facilities' cases for a
41 | reference-date/report-date/disease tuple. We want to aggregate these counts
42 | to the level of geographic aggregate/report-date/reference-date/disease.
43 | }
44 | \details{
45 | We handle two distinct cases for geographic aggregates:
46 | \enumerate{
47 | \item A single state: Subset to facilities \strong{in that state only} and aggregate
48 | up to the state level 2. The US overall: Aggregate over all facilities
49 | without any subsetting
50 | }
51 | 
52 | Note that we do \emph{not} apply exclusions here. The exclusions are applied
53 | later, after the aggregations. That means that for the US overall, we
54 | aggregate over points that might potentially be excluded at the state level.
55 | Our recourse in this case is to exclude the US overall aggregate point.
56 | }
57 | \concept{read_data}
58 | 


--------------------------------------------------------------------------------
/man/read_disease_parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parameters.R
 3 | \name{read_disease_parameters}
 4 | \alias{read_disease_parameters}
 5 | \title{Read in disease process parameters from an external file or files}
 6 | \usage{
 7 | read_disease_parameters(
 8 |   generation_interval_path,
 9 |   delay_interval_path,
10 |   right_truncation_path,
11 |   disease,
12 |   as_of_date,
13 |   geo_value,
14 |   report_date
15 | )
16 | }
17 | \arguments{
18 | \item{generation_interval_path, delay_interval_path, right_truncation_path}{Path to a local file with the parameter PMF. See \code{\link[=read_interval_pmf]{read_interval_pmf()}} for
19 | details on the file schema. The parameters can be in the same file or a
20 | different file.}
21 | 
22 | \item{disease}{A string specifying the disease being modeled. One of
23 | \code{"COVID-19"} or \code{"Influenza"} or \code{"RSV"}.}
24 | 
25 | \item{as_of_date}{Use the parameters that were used in production on this
26 | date. Set for the current date for the most up-to-to date version of the
27 | parameters and set to an earlier date to use parameters from an earlier
28 | time period.}
29 | 
30 | \item{geo_value}{An uppercase, two-character string specifying the geographic
31 | value, usually a state or \code{"US"} for national data.}
32 | 
33 | \item{report_date}{An optional parameter to subset the query to a parameter
34 | on or before a particular \code{report_date}. Right now, the only parameter with
35 | report date-specific estimates is \code{right_truncation}. Note that this
36 | is similar to, but different from \code{as_of_date}. The \code{report_date} is used
37 | to select the particular value of a time-varying estimate. This estimate
38 | may itself be regenerated over time (e.g., as new data becomes available or
39 | with a methodological update). We can pull the estimate for date
40 | \code{report_date} as generated on date \code{as_of_date}.}
41 | }
42 | \value{
43 | A named list with three PMFs. The list elements are named
44 | \code{generation_interval}, \code{delay_interval}, and \code{right_truncation}. If a path
45 | to a local file is not provided (NA or NULL), the corresponding parameter
46 | estimate will be NA in the returned list.
47 | }
48 | \description{
49 | Read in disease process parameters from an external file or files
50 | }
51 | \details{
52 | \code{generation_interval_path} is required because the generation
53 | interval is a required parameter for $R_t$ estimation.
54 | \code{delay_interval_path} and \code{right_truncation_path} are optional (but
55 | strongly suggested).
56 | }
57 | \seealso{
58 | Other parameters: 
59 | \code{\link{opts_formatter}},
60 | \code{\link{read_interval_pmf}()}
61 | }
62 | \concept{parameters}
63 | 


--------------------------------------------------------------------------------
/man/read_exclusions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exclusions.R
 3 | \name{read_exclusions}
 4 | \alias{read_exclusions}
 5 | \title{Read exclusions from an external file}
 6 | \usage{
 7 | read_exclusions(path)
 8 | }
 9 | \arguments{
10 | \item{path}{The path to the exclusions file in \code{.csv} format}
11 | }
12 | \value{
13 | A dataframe with columns \code{reference_date}, \code{report_date},
14 | \code{geo_value}, \code{disease}
15 | }
16 | \description{
17 | Expects to read a CSV with required columns:
18 | \itemize{
19 | \item \code{reference_date}
20 | \item \code{report_date}
21 | \item \code{state}
22 | \item \code{disease}
23 | }
24 | }
25 | \details{
26 | These columns have the same meaning as in \code{\link[=read_data]{read_data()}}. Additional columns
27 | are allowed and will be ignored by the reader.
28 | }
29 | \seealso{
30 | Other exclusions: 
31 | \code{\link{apply_exclusions}()}
32 | }
33 | \concept{exclusions}
34 | 


--------------------------------------------------------------------------------
/man/read_interval_pmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parameters.R
 3 | \name{read_interval_pmf}
 4 | \alias{read_interval_pmf}
 5 | \title{Read parameter PMF into memory}
 6 | \usage{
 7 | read_interval_pmf(
 8 |   path,
 9 |   disease = c("COVID-19", "Influenza", "RSV", "test"),
10 |   as_of_date,
11 |   parameter = c("generation_interval", "delay", "right_truncation"),
12 |   geo_value = NA,
13 |   report_date = NA
14 | )
15 | }
16 | \arguments{
17 | \item{path}{A path to a local file}
18 | 
19 | \item{disease}{A string specifying the disease being modeled. One of
20 | \code{"COVID-19"} or \code{"Influenza"} or \code{"RSV"}.}
21 | 
22 | \item{as_of_date}{Use the parameters that were used in production on this
23 | date. Set for the current date for the most up-to-to date version of the
24 | parameters and set to an earlier date to use parameters from an earlier
25 | time period.}
26 | 
27 | \item{parameter}{One of "generation interval", "delay", or "right-truncation"}
28 | 
29 | \item{geo_value}{An uppercase, two-character string specifying the geographic
30 | value, usually a state or \code{"US"} for national data.}
31 | 
32 | \item{report_date}{An optional parameter to subset the query to a parameter
33 | on or before a particular \code{report_date}. Right now, the only parameter with
34 | report date-specific estimates is \code{right_truncation}. Note that this
35 | is similar to, but different from \code{as_of_date}. The \code{report_date} is used
36 | to select the particular value of a time-varying estimate. This estimate
37 | may itself be regenerated over time (e.g., as new data becomes available or
38 | with a methodological update). We can pull the estimate for date
39 | \code{report_date} as generated on date \code{as_of_date}.}
40 | }
41 | \value{
42 | A PMF vector
43 | }
44 | \description{
45 | Using DuckDB from a parquet file. The function expects the file to be in SCD2
46 | format with column names:
47 | \itemize{
48 | \item parameter
49 | \item geo_value
50 | \item disease
51 | \item start_date
52 | \item end_date
53 | \item value
54 | }
55 | }
56 | \details{
57 | start_date and end_date specify the date range for which the value was used.
58 | end_date may be NULL (e.g. for the current value used in production). value
59 | must contain a pmf vector whose values are all positive and sum to 1. all
60 | other fields must be consistent with the specifications of the function
61 | arguments described below, which are used to query from the .parquet file.
62 | 
63 | SCD2 format is shorthand for slowly changing dimension type 2. This format is
64 | normalized to track change over time:
65 | https://en.wikipedia.org/wiki/Slowly_changing_dimension#Type_2:_add_new_row
66 | }
67 | \seealso{
68 | Other parameters: 
69 | \code{\link{opts_formatter}},
70 | \code{\link{read_disease_parameters}()}
71 | }
72 | \concept{parameters}
73 | 


--------------------------------------------------------------------------------
/man/read_json_into_config.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/config.R
 3 | \name{read_json_into_config}
 4 | \alias{read_json_into_config}
 5 | \title{Read JSON Configuration into Config Object}
 6 | \usage{
 7 | read_json_into_config(config_path, optional_fields)
 8 | }
 9 | \arguments{
10 | \item{config_path}{A string specifying the path to the JSON configuration
11 | file.}
12 | 
13 | \item{optional_fields}{A list of strings specifying the optional fields in
14 | the JSON file. If a field is not present in the JSON file, and is marked as
15 | optional, it will be set to either the empty type (e.g. \code{chr(0)}), or NULL.
16 | If a field is not present in the JSON file, and is not marked as optional, an
17 | error will be thrown.}
18 | }
19 | \value{
20 | An instance of the \code{Config} class populated with the data from the
21 | JSON file.
22 | }
23 | \description{
24 | Reads a JSON file from the specified path and converts it into a \code{Config}
25 | object.
26 | }
27 | \seealso{
28 | Other config: 
29 | \code{\link{Config}()},
30 | \code{\link{Data}()},
31 | \code{\link{Exclusions}()},
32 | \code{\link{Interval}},
33 | \code{\link{Parameters}()}
34 | }
35 | \concept{config}
36 | 


--------------------------------------------------------------------------------
/man/sample_processing_functions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/write_output.R
 3 | \name{sample_processing_functions}
 4 | \alias{sample_processing_functions}
 5 | \alias{process_samples}
 6 | \alias{process_quantiles}
 7 | \title{Process posterior samples from a Stan fit object (raw draws).}
 8 | \usage{
 9 | process_samples(fit, geo_value, model, disease)
10 | 
11 | process_quantiles(fit, geo_value, model, disease, quantile_width)
12 | }
13 | \arguments{
14 | \item{fit}{An \code{EpiNow2} fit object with posterior estimates.}
15 | 
16 | \item{geo_value}{An uppercase, two-character string specifying the geographic
17 | value, usually a state or \code{"US"} for national data.}
18 | 
19 | \item{model}{A string specifying the model to be used.}
20 | 
21 | \item{disease}{A string specifying the disease being modeled. One of
22 | \code{"COVID-19"} or \code{"Influenza"} or \code{"RSV"}.}
23 | 
24 | \item{quantile_width}{A vector of numeric values representing the desired
25 | quantiles. Passed to \code{\link[tidybayes:reexports]{tidybayes::median_qi()}}.}
26 | }
27 | \value{
28 | A data.table of posterior draws or quantiles, merged and processed.
29 | }
30 | \description{
31 | Extracts raw posterior samples from a Stan fit object and post-processes
32 | them, including merging with a fact table and standardizing the parameter
33 | names. If calling \verb{[process_quantiles()]} the 50\% and 95\% intervals are
34 | returned in \code{tidybayes} format.
35 | }
36 | \seealso{
37 | Other write_output: 
38 | \code{\link{write_model_outputs}()},
39 | \code{\link{write_output_dir_structure}()}
40 | }
41 | \concept{write_output}
42 | 


--------------------------------------------------------------------------------
/man/sir_gt_pmf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{sir_gt_pmf}
 5 | \alias{sir_gt_pmf}
 6 | \title{Generation interval corresponding to the sample \code{gostic_toy_rt} dataset}
 7 | \format{
 8 | \code{sir_gt_pmf} A numeric vector of length 26 that sums to one within
 9 | numerical tolerance
10 | }
11 | \usage{
12 | sir_gt_pmf
13 | }
14 | \description{
15 | Gostic et al., 2020 simulates data from a stochastic SEIR model. Residence
16 | time in both the E and the I compartments is exponentially distributed, with
17 | a mean of 4 days (or a rate/inverse-scale of 1/4). These residence times
18 | imply a gamma-distributed generation time distribution with a shape of 2 and
19 | a rate of 1/4. We convert the continuous gamma distribution into a PMF to use
20 | with \code{{RtGam}}.
21 | }
22 | \details{
23 | From this parametric specification, we produce a double-censored,
24 | left-truncated probability mass function of the generation interval
25 | distribution. We produce the PMF using \code{{epinowcast}}'s
26 | \code{simulate_double_censored_pmf()} with version 0.3.0. See
27 | https://doi.org/10.1101/2024.01.12.24301247 for more information on
28 | double-censoring biases and corrections.
29 | 
30 | We correct the output from \code{simulate_double_censored_pmf()} to make it
31 | appropriate to use with \code{{EpiNow2}}. The function returns a numeric vector,
32 | with the position of the element corresponding to one day more than the
33 | length of the delay and value corresponding to the amount of discretized
34 | probability density in the bin. The vector does not necessarily sum to one.
35 | We drop the first element of the vector, which corresponds to a zero-day
36 | delay. The renewal framework, which underpins our model does not account for
37 | zero-day delays. We renormalize the left-truncated vector to sum to one so
38 | that it's a proper PMF.
39 | }
40 | \seealso{
41 | Other data: 
42 | \code{\link{gostic_toy_rt}}
43 | }
44 | \concept{data}
45 | \keyword{datasets}
46 | 


--------------------------------------------------------------------------------
/man/write_model_outputs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/write_output.R
 3 | \name{write_model_outputs}
 4 | \alias{write_model_outputs}
 5 | \title{Write model outputs to specified directories}
 6 | \usage{
 7 | write_model_outputs(
 8 |   fit,
 9 |   samples,
10 |   summaries,
11 |   output_dir,
12 |   job_id,
13 |   task_id,
14 |   metadata = list(),
15 |   diagnostics
16 | )
17 | }
18 | \arguments{
19 | \item{fit}{An \code{EpiNow2} fit object with posterior estimates.}
20 | 
21 | \item{samples}{A data.table as returned by \code{\link[=process_samples]{process_samples()}}}
22 | 
23 | \item{summaries}{A data.table as returned by \code{\link[=process_quantiles]{process_quantiles()}}}
24 | 
25 | \item{output_dir}{A string specifying the directory where output, logs, and
26 | other pipeline artifacts will be saved. Defaults to the root directory ("/").}
27 | 
28 | \item{job_id}{A string specifying the job.}
29 | 
30 | \item{task_id}{A string specifying the task.}
31 | 
32 | \item{metadata}{List. Additional metadata to be included in the output. The
33 | paths to the samples, summaries, and model output will be added to the
34 | metadata list.}
35 | 
36 | \item{diagnostics}{A data.table as returned by \code{\link[=extract_diagnostics]{extract_diagnostics()}}}
37 | }
38 | \value{
39 | Invisible NULL. The function is called for its side effects.
40 | }
41 | \description{
42 | Processes the model fit, extracts samples and quantiles,
43 | and writes them to the appropriate directories.
44 | }
45 | \seealso{
46 | Other write_output: 
47 | \code{\link{sample_processing_functions}},
48 | \code{\link{write_output_dir_structure}()}
49 | }
50 | \concept{write_output}
51 | 


--------------------------------------------------------------------------------
/man/write_output_dir_structure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/write_output.R
 3 | \name{write_output_dir_structure}
 4 | \alias{write_output_dir_structure}
 5 | \title{Create output directory structure for a given job and task.}
 6 | \usage{
 7 | write_output_dir_structure(output_dir, job_id, task_id)
 8 | }
 9 | \arguments{
10 | \item{output_dir}{A string specifying the directory where output, logs, and
11 | other pipeline artifacts will be saved. Defaults to the root directory ("/").}
12 | 
13 | \item{job_id}{A string specifying the job.}
14 | 
15 | \item{task_id}{A string specifying the task.}
16 | }
17 | \value{
18 | The path to the base output directory (invisible).
19 | }
20 | \description{
21 | This function generates the necessary directory structure for storing output
22 | files related to a job and its tasks, including directories for raw samples
23 | and summarized quantiles.
24 | }
25 | \seealso{
26 | Other write_output: 
27 | \code{\link{sample_processing_functions}},
28 | \code{\link{write_model_outputs}()}
29 | }
30 | \concept{write_output}
31 | 


--------------------------------------------------------------------------------
/rules_of_behavior.md:
--------------------------------------------------------------------------------
 1 | # Rules of Behavior and Posting Guidelines for the Use of GitHub as a Third-Party Web Application
 2 | 
 3 | ## Purpose
 4 | 
 5 | These rules of behavior establish the privacy and information security requirements for the use of Third Party Web Applications (TPWAs) in conjunction with the CDC GitHub.com organizations established for open source projects. These rules of behavior were developed to ensure that CDC and its confidential information and technologies are not compromised, as well as protecting general CDC interests and services from risks associated with the use of TPWAs while allowing for the increased efficiencies and cost savings that come with appropriate use of third party services.
 6 | 
 7 | ## Scope
 8 | 
 9 | These rules of behavior and its related guidance apply to federal employees, contractors, and all external collaborators who will access GitHub from CDC directly or use them with non-sensitive data obtained from CDC.  All engagement with TPWAs related to the GitHub will be governed by these rules of behavior, as well as to the Rules of Behavior for the Use of HHS Information Services.
10 | 
11 | ## Ownership
12 | 
13 | CDC assigns three stewards in charge of rules and policy compliance: a Business Steward, a Security Steward, and a Technical Steward. The business and security stewards are responsible for establishing policy and providing approval, while the technical steward fulfills requests from users. Users requesting access to GitHub that have not been approved yet need to assign a main and a backup point of contact (POC) with the business steward, as well as provide a justification to the security steward.
14 | 
15 | The security steward is responsible for the security of the GitHub usage as a TPWA and its impact on the CDC network and compliance with CDC security policies. All users, including POCs, are responsible for adherence to this policy and associated processes. Where there is not a rule of behavior that provides explicit guidance, users must do their best to safeguard CDC and its network and services from security risks.
16 | 
17 | ## Rules of Behavior
18 | 
19 | All new users of GitHub must read and acknowledge these rules before using any of the approved TPWAs. This acknowledgment must be completed annually, and establishes agreement from part of the user to adhere to these rules.
20 | 
21 | * I understand that I must complete security awareness and records management training annually in order to comply with the latest security and records management policies.
22 | * I understand that I must also follow the Rules of Behavior for use of HHS Information Resources.
23 | * I understand that I must not use, share, or store any kind of sensitive data (health status, provision or payment of healthcare, pictures, PII, etc.) with TPWAs under ANY circumstance.
24 | * I will not knowingly conceal, falsify or remove information.This includes editing or removing the template language provided when a Github repository is created.
25 | * I understand that I can only use non-sensitive and/or publicly available data in GitHub. If you are unsure of what constitutes non-sensitive information, please see guidance below.
26 | * I understand that all passwords I create to set up GitHub accounts need to comply with CDC’s password policy.
27 | * I understand that the steward reserves the right to moderate all data at any time.
28 | * I understand my responsibilities to protect systems and data as specified by CDC policies.
29 | 
30 | ## Guidance Regarding Non-Sensitive and Publicly Available Information
31 | 
32 | In support of program collaboration in the use oF GitHub, portions of some GitHub projects are either currently open to the public or may become open to the public in the future. The following guidelines will inform and assist the user in determining that the information to be posted on GitHub is not sensitive. The bottom line is if the content you are posting is not appropriate to post for public access, it should not be posted on GitHub.
33 | 
34 | Before posting information that involves other CDC programs, employees, etc. to GitHub, it is important that the poster ensures they receive approval by the relevant CDC entity to post the information.
35 | 
36 | Questions to consider before posting information include:
37 | 
38 | | Do I have reservations about anyone viewing this information?                                                     | Yes | Do not post. |
39 | | Were individuals informed that this information would be posted on GitHub?                                        | No  | Do not post. |
40 | | Does this information contain details or descriptions of CDC security systems or other sensitive infrastructures? | Yes | Do not post. |
41 | | Does this information reflect program efforts to engage and inform external partners and the public?              | No  | Do not post. |
42 | 
43 | Examples of information which has been deemed not sensitive and may be posted on GitHub include the following.
44 | 
45 | * Source Code
46 | * Use cases
47 | * User stories/requirements
48 | * Process flows
49 | * Program pain points
50 | * Software Service Descriptions
51 | 
52 | Sensitive information, which should not be posted, includes (but is not limited to) the following.
53 | 
54 | * Information directly attributed to an individual in a sensitive manner
55 | * The names or pictures of individuals
56 | * Protected health information
57 | * Project management material. This includes posting or discussing security documentation, implementation plans, communications regarding project specifics, etc.
58 | * Opinions related to programs or tools, specifically those that may have an adverse impact
59 | * Non-public Links to CDC SharePoint or other internal references
60 | * Non-public Details on CDC internal infrastructure
61 | 
62 | If there’s any question on whether information may be sensitive (such as detailed interview notes or specific references provided during a program interview), further guidance should be sought from the security steward prior to posting the information on any GitHub.
63 | 
64 | ## Enforcement
65 | 
66 | Users looking to use GitHub  that are unable to follow these rules of behavior will not have authorization to do so. Any users that violate these rules of behavior or CDC security policies may be subject to action, up to and including revoking access to GitHub. Technical and security stewards have the right to enforce these rules of behavior based on violations at any time.
67 | 
68 | ## References
69 | 
70 | * [Policy for Managing the Use of Third-Party Websites and Applications](https://www.hhs.gov/about/agencies/asa/ocio/cybersecurity/policy-social-media-technologies/index.html)
71 | * [Rules of Behavior for Use of HHS Information Resources](http://www.hhs.gov/ocio/policy/hhs-rob.html)
72 | * [Security and Awareness Training](http://sat.cdc.gov/) (requires login)
73 | 


--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This is a wrapper script around the CFAEpiNow2Pipeline::orchestrate_pipeline command that checks
 4 | # environment variables and executes the command. This provides a simple mechanism that can be specified
 5 | # as a container startup command, allowing the same build to be executed with different configurations
 6 | # and inputs.
 7 | 
 8 | # Config file always differs and needs to be specified as a parameter. Azure tenant id, client id,
 9 | # and service principal are required as environment variables. If any are not present, print a message
10 | # and exit.
11 | if [[ -z "$1" ]]; then
12 |     echo "No config file specified - please provide as argument to this script."
13 | elif [[ -z "${az_tenant_id}" ]]; then
14 |     echo "No Azure Tenant ID specified - please set az_tenant_id environment variable."
15 | elif [[ -z "${az_client_id}" ]]; then
16 |     echo "No Azure Client ID specified - please set az_client_id environment variable."
17 | elif [[ -z "${az_service_principal}" ]]; then
18 |     echo "No Azure Service Principal specified - please set az_service_principal environment variable."
19 | else
20 |     # check for other environment variables, using defaults if not set
21 |     CFG_CNTR="${CFG_CNTR:-rt-epinow2-config}"
22 |     INPUT_DIR="${INPUT_DIR:-/mnt/input}"
23 |     OUTPUT_DIR="${OUTPUT_DIR:-/mnt}"
24 |     OUTPUT_CNTR="${OUTPUT_CNTR:-zs-test-pipeline-update}"
25 | 
26 |     # build the string
27 |     EXEC_STR="CFAEpiNow2Pipeline::orchestrate_pipeline('$1', config_container='$CFG_CNTR', input_dir='$INPUT_DIR', output_dir='$OUTPUT_DIR', output_container='$OUTPUT_CNTR')"
28 | 
29 |     # print it, also visible and filterable in Azure logs
30 |     echo "Executing pipeline: $EXEC_STR"
31 | 
32 |     # execute
33 |     Rscript -e "$EXEC_STR"
34 | fi
35 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(CFAEpiNow2Pipeline)
11 | 
12 | test_check("CFAEpiNow2Pipeline")
13 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/fit_model.md:
--------------------------------------------------------------------------------
 1 | # Right truncation longer than data throws error
 2 | 
 3 |     Removing right-truncation PMF elements after 2
 4 |     Right truncation PMF longer than the data
 5 |     PMF length: 3
 6 |     Data length: 2
 7 |     PMF can only be up to the length of the data
 8 | 
 9 | # Missing keys throws error
10 | 
11 |     Code
12 |       format_stan_opts(list(), random_seed)
13 |     Condition
14 |       Error in `format_stan_opts()`:
15 |       ! Missing expected keys/values in "sampler_opts"
16 |       Missing keys: "cores", "chains", "iter_warmup", "iter_sampling", "adapt_delta", and "max_treedepth"
17 |       Missing values:
18 | 
19 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/parameters.md:
--------------------------------------------------------------------------------
 1 | # NULL `reference_date` prints in output
 2 | 
 3 |     Code
 4 |       pmf <- check_returned_pmf(pmf_df = pmf_df, parameter = parameter, disease = disease,
 5 |         as_of_date = as_of_date, geo_value = geo_value, report_date = report_date,
 6 |         path = path)
 7 |     Message
 8 |       Using right-truncation estimate for date "NA"
 9 |       Queried last available estimate from "2023-01-15" or earlier
10 |       Subject to parameters available as of "2023-01-01"
11 | 
12 | # GI with nonzero first element throws warning
13 | 
14 |     Code
15 |       fixed <- format_generation_interval(pmf)
16 |     Condition
17 |       Warning:
18 |       First element of GI PMF is not 0
19 |       x Renewal equation assumes no same-day transmission
20 |       ! Auto-fixing by prepending a 0. Consider left-truncating instead?
21 |       > New PMF: 0, 0.0478174439101374, 0.0760979101401105, 0.0895274782138445, 0.0932924246386663, 0.0910112663029942, 0.0851745750679048, 0.0774669281292755, 0.0690016173717581, 0.0604909602604732, 0.0523692179334625, 0.0448807538374044, 0.0381427961649933, 0.0321897258102522, 0.0270039920145235, 0.0225374046222701, 0.0187255476449921, 0.0154973154449738, ..., 0.00308673656614286, and 0.00250027133286461
22 | 
23 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/read_data.md:
--------------------------------------------------------------------------------
1 | # Incomplete return throws warning
2 | 
3 |     Incomplete number of rows returned
4 |     Expected 23 rows
5 |     Observed 21 rows
6 |     Missing reference date(s): 2022-12-31 and 2023-01-01
7 | 
8 | 


--------------------------------------------------------------------------------
/tests/testthat/data/2025-04-02_test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/tests/testthat/data/2025-04-02_test.parquet


--------------------------------------------------------------------------------
/tests/testthat/data/CA_COVID-19.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "job_id": "Rt-estimation-2024-11-26T14-38-24-622e8cc8ac3611efbe8d5a0f1d07309c",
 3 |   "task_id": "622e8cc8ac3611efbe8d5a0f1d07309c_CA_COVID-19_1732653504",
 4 |   "min_reference_date": "2024-10-01",
 5 |   "max_reference_date": "2024-11-25",
 6 |   "disease": "COVID-19",
 7 |   "geo_value": "CA",
 8 |   "geo_type": "state",
 9 |   "report_date": "2024-11-26",
10 |   "production_date": "2024-11-26",
11 |   "parameters": {
12 |     "as_of_date": "2024-11-26",
13 |     "generation_interval": {
14 |       "path": "test_parameters.parquet",
15 |       "blob_storage_container": null
16 |     },
17 |     "delay_interval": {
18 |       "path": null,
19 |       "blob_storage_container": null
20 |     },
21 |     "right_truncation": {
22 |       "path": null,
23 |       "blob_storage_container": null
24 |     }
25 |   },
26 |   "data": {
27 |     "path": "CA_test.parquet",
28 |     "blob_storage_container": null
29 |   },
30 |   "seed": 42,
31 |   "horizon": 14,
32 |   "priors": {
33 |     "rt": {
34 |       "mean": 1.0,
35 |       "sd": 0.2
36 |     },
37 |     "gp": {
38 |       "alpha_sd": 0.01
39 |     }
40 |   },
41 |   "sampler_opts": {
42 |     "cores": 1,
43 |     "chains": 1,
44 |     "iter_warmup": 50,
45 |     "iter_sampling": 50,
46 |     "adapt_delta": 0.99,
47 |     "max_treedepth": 12
48 |   },
49 |   "exclusions": {
50 |     "path": null
51 |   },
52 |   "config_version": "1.0",
53 |   "quantile_width": [
54 |     0.5,
55 |     0.95
56 |   ],
57 |   "model": "EpiNow2"
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/testthat/data/CA_test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/tests/testthat/data/CA_test.parquet


--------------------------------------------------------------------------------
/tests/testthat/data/bad_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "job_id": "6183da58-89bc-455f-8562-4f607257a876",
 3 |     "task_id": "bc0c3eb3-7158-4631-a2a9-86b97357f97e",
 4 |     "disease": "test",
 5 |     "geo_value": "test",
 6 |     "geo_type": "test",
 7 |     "min_reference_date": "2023-01-02",
 8 |     "max_reference_date": "2023-01-07",
 9 |     "report_date": "2023-10-28",
10 |     "quantile_width": [0.5, 0.95],
11 |     "model": "EpiNow2_test",
12 |     "parameters": {
13 |       "as_of_date": "2023-10-28",
14 |        "generation_interval": {
15 |          "path": "data/test_parameters.parquet",
16 |          "blob_storage_container": null
17 |        },
18 |        "delay_interval": {
19 |          "path": null,
20 |          "blob_storage_container": null
21 |        },
22 |        "right_truncation": {
23 |          "path": null,
24 |          "blob_storage_container": null
25 |        }
26 |     },
27 |     "data": {
28 |         "path": "data/test_data.parquet",
29 | 	"blob_storage_container": null
30 |    },
31 |    "exclusions": {
32 |         "path": "data/test_exclusions.csv",
33 |         "blob_storage_container": null
34 |    },
35 |     "seed": 42,
36 |     "horizon": 14,
37 |     "priors": {
38 |         "rt": {
39 |             "mean": 1.0,
40 |             "sd": 0.2
41 |         },
42 |         "gp": {
43 |             "alpha_sd": 0.01
44 |         }
45 |     },
46 |     "sampler_opts": {
47 |         "cores": 1,
48 |         "chains": 1,
49 |         "iter_warmup": 50,
50 |         "iter_sampling": -50,
51 |         "adapt_delta": 0.99,
52 |         "max_treedepth": 12
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/testthat/data/sample_config_no_exclusion.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "job_id": "6183da58-89bc-455f-8562-4f607257a876",
 3 |     "task_id": "bc0c3eb3-7158-4631-a2a9-86b97357f97e",
 4 |     "disease": "test",
 5 |     "geo_value": "test",
 6 |     "geo_type": "test",
 7 |     "min_reference_date": "2023-01-02",
 8 |     "max_reference_date": "2023-01-07",
 9 |     "report_date": "2023-10-28",
10 |     "production_date": "2024-10-28",
11 |     "quantile_width": [0.5, 0.95],
12 |     "model": "EpiNow2_test",
13 |     "parameters": {
14 |       "as_of_date": "2023-10-28",
15 |        "generation_interval": {
16 |          "path": "data/test_parameters.parquet",
17 |          "blob_storage_container": null
18 |        },
19 |        "delay_interval": {
20 |          "path": null,
21 |          "blob_storage_container": null
22 |        },
23 |        "right_truncation": {
24 |          "path": null,
25 |          "blob_storage_container": null
26 |        }
27 |     },
28 |     "data": {
29 |         "path": "data/test_data.parquet",
30 | 	"blob_storage_container": null
31 |    },
32 |     "seed": 42,
33 |     "horizon": 14,
34 |     "priors": {
35 |         "rt": {
36 |             "mean": 1.0,
37 |             "sd": 0.2
38 |         },
39 |         "gp": {
40 |             "alpha_sd": 0.01
41 |         }
42 |     },
43 |     "sampler_opts": {
44 |         "cores": 1,
45 |         "chains": 1,
46 |         "iter_warmup": 50,
47 |         "iter_sampling": 50,
48 |         "adapt_delta": 0.99,
49 |         "max_treedepth": 12
50 |     },
51 |     "config_version": "0.1.0"
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/testthat/data/sample_config_with_exclusion.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "job_id": "6183da58-89bc-455f-8562-4f607257a876",
 3 |     "task_id": "bc0c3eb3-7158-4631-a2a9-86b97357f97e",
 4 |     "disease": "test",
 5 |     "geo_value": "test",
 6 |     "geo_type": "test",
 7 |     "min_reference_date": "2023-01-02",
 8 |     "max_reference_date": "2023-01-07",
 9 |     "report_date": "2023-10-28",
10 |     "production_date": "2024-10-28",
11 |     "quantile_width": [0.5, 0.95],
12 |     "model": "EpiNow2_test",
13 |     "parameters": {
14 |       "as_of_date": "2023-10-28",
15 |        "generation_interval": {
16 |          "path": "test_parameters.parquet",
17 |          "blob_storage_container": null
18 |        },
19 |        "delay_interval": {
20 |          "path": null,
21 |          "blob_storage_container": null
22 |        },
23 |        "right_truncation": {
24 |          "path": null,
25 |          "blob_storage_container": null
26 |        }
27 |     },
28 |     "data": {
29 |         "path": "test_data.parquet",
30 | 	 "blob_storage_container": null
31 |    },
32 |    "exclusions": {
33 |         "path": "test_exclusions.csv",
34 |         "blob_storage_container": null
35 |    },
36 |     "seed": 42,
37 |     "horizon": 14,
38 |     "priors": {
39 |         "rt": {
40 |             "mean": 1.0,
41 |             "sd": 0.2
42 |         },
43 |         "gp": {
44 |             "alpha_sd": 0.01
45 |         }
46 |     },
47 |     "sampler_opts": {
48 |         "cores": 1,
49 |         "chains": 1,
50 |         "iter_warmup": 50,
51 |         "iter_sampling": 50,
52 |         "adapt_delta": 0.99,
53 |         "max_treedepth": 12
54 |     },
55 |     "config_version": "0.1.0"
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/testthat/data/sample_fit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/tests/testthat/data/sample_fit.rds


--------------------------------------------------------------------------------
/tests/testthat/data/test_big_exclusions.csv:
--------------------------------------------------------------------------------
  1 | reference_date,report_date,state,disease
  2 | 2025-04-01,2025-04-02,AL,COVID-19
  3 | 2025-04-01,2025-04-02,AK,COVID-19
  4 | 2025-04-01,2025-04-02,AZ,COVID-19
  5 | 2025-04-01,2025-04-02,CA,COVID-19
  6 | 2025-04-01,2025-04-02,CO,COVID-19
  7 | 2025-04-01,2025-04-02,CT,COVID-19
  8 | 2025-04-01,2025-04-02,DE,COVID-19
  9 | 2025-04-01,2025-04-02,DC,COVID-19
 10 | 2025-04-01,2025-04-02,FL,COVID-19
 11 | 2025-04-01,2025-04-02,HI,COVID-19
 12 | 2025-04-01,2025-04-02,ID,COVID-19
 13 | 2025-04-01,2025-04-02,IL,COVID-19
 14 | 2025-04-01,2025-04-02,IN,COVID-19
 15 | 2025-04-01,2025-04-02,IA,COVID-19
 16 | 2025-04-01,2025-04-02,KS,COVID-19
 17 | 2025-04-01,2025-04-02,KY,COVID-19
 18 | 2025-04-01,2025-04-02,LA,COVID-19
 19 | 2025-04-01,2025-04-02,ME,COVID-19
 20 | 2025-04-01,2025-04-02,MD,COVID-19
 21 | 2025-04-01,2025-04-02,MA,COVID-19
 22 | 2025-04-01,2025-04-02,MI,COVID-19
 23 | 2025-04-01,2025-04-02,MN,COVID-19
 24 | 2025-04-01,2025-04-02,MS,COVID-19
 25 | 2025-04-01,2025-04-02,MO,COVID-19
 26 | 2025-04-01,2025-04-02,MT,COVID-19
 27 | 2025-04-01,2025-04-02,NE,COVID-19
 28 | 2025-04-01,2025-04-02,NV,COVID-19
 29 | 2025-04-01,2025-04-02,NH,COVID-19
 30 | 2025-04-01,2025-04-02,NJ,COVID-19
 31 | 2025-04-01,2025-04-02,NM,COVID-19
 32 | 2025-04-01,2025-04-02,NY,COVID-19
 33 | 2025-04-01,2025-04-02,ND,COVID-19
 34 | 2025-03-31,2025-04-02,OH,COVID-19
 35 | 2025-04-01,2025-04-02,OH,COVID-19
 36 | 2025-04-01,2025-04-02,OK,COVID-19
 37 | 2025-04-01,2025-04-02,OR,COVID-19
 38 | 2025-04-01,2025-04-02,PA,COVID-19
 39 | 2025-04-01,2025-04-02,RI,COVID-19
 40 | 2025-04-01,2025-04-02,SD,COVID-19
 41 | 2025-04-01,2025-04-02,TN,COVID-19
 42 | 2025-04-01,2025-04-02,TX,COVID-19
 43 | 2025-04-01,2025-04-02,US,COVID-19
 44 | 2025-03-31,2025-04-02,UT,COVID-19
 45 | 2025-04-01,2025-04-02,UT,COVID-19
 46 | 2025-04-01,2025-04-02,VT,COVID-19
 47 | 2025-04-01,2025-04-02,VA,COVID-19
 48 | 2025-04-01,2025-04-02,WA,COVID-19
 49 | 2025-04-01,2025-04-02,WV,COVID-19
 50 | 2025-04-01,2025-04-02,WI,COVID-19
 51 | 2025-04-01,2025-04-02,AL,Influenza
 52 | 2025-04-01,2025-04-02,AK,Influenza
 53 | 2025-04-01,2025-04-02,AZ,Influenza
 54 | 2025-04-01,2025-04-02,CA,Influenza
 55 | 2025-04-01,2025-04-02,CO,Influenza
 56 | 2025-04-01,2025-04-02,CT,Influenza
 57 | 2025-04-01,2025-04-02,DE,Influenza
 58 | 2025-04-01,2025-04-02,DC,Influenza
 59 | 2025-04-01,2025-04-02,FL,Influenza
 60 | 2025-04-01,2025-04-02,HI,Influenza
 61 | 2025-04-01,2025-04-02,ID,Influenza
 62 | 2025-04-01,2025-04-02,IL,Influenza
 63 | 2025-04-01,2025-04-02,IN,Influenza
 64 | 2025-04-01,2025-04-02,IA,Influenza
 65 | 2025-04-01,2025-04-02,KS,Influenza
 66 | 2025-04-01,2025-04-02,KY,Influenza
 67 | 2025-04-01,2025-04-02,LA,Influenza
 68 | 2025-04-01,2025-04-02,ME,Influenza
 69 | 2025-04-01,2025-04-02,MD,Influenza
 70 | 2025-04-01,2025-04-02,MA,Influenza
 71 | 2025-04-01,2025-04-02,MI,Influenza
 72 | 2025-04-01,2025-04-02,MN,Influenza
 73 | 2025-04-01,2025-04-02,MS,Influenza
 74 | 2025-04-01,2025-04-02,MO,Influenza
 75 | 2025-04-01,2025-04-02,MT,Influenza
 76 | 2025-04-01,2025-04-02,NE,Influenza
 77 | 2025-04-01,2025-04-02,NV,Influenza
 78 | 2025-04-01,2025-04-02,NH,Influenza
 79 | 2025-04-01,2025-04-02,NJ,Influenza
 80 | 2025-04-01,2025-04-02,NM,Influenza
 81 | 2025-04-01,2025-04-02,NY,Influenza
 82 | 2025-04-01,2025-04-02,ND,Influenza
 83 | 2025-03-31,2025-04-02,OH,Influenza
 84 | 2025-04-01,2025-04-02,OH,Influenza
 85 | 2025-04-01,2025-04-02,OK,Influenza
 86 | 2025-04-01,2025-04-02,OR,Influenza
 87 | 2025-04-01,2025-04-02,PA,Influenza
 88 | 2025-04-01,2025-04-02,RI,Influenza
 89 | 2025-04-01,2025-04-02,SD,Influenza
 90 | 2025-04-01,2025-04-02,TN,Influenza
 91 | 2025-04-01,2025-04-02,TX,Influenza
 92 | 2025-04-01,2025-04-02,US,Influenza
 93 | 2025-03-31,2025-04-02,UT,Influenza
 94 | 2025-04-01,2025-04-02,UT,Influenza
 95 | 2025-04-01,2025-04-02,VT,Influenza
 96 | 2025-04-01,2025-04-02,VA,Influenza
 97 | 2025-04-01,2025-04-02,WA,Influenza
 98 | 2025-04-01,2025-04-02,WV,Influenza
 99 | 2025-04-01,2025-04-02,WI,Influenza
100 | 


--------------------------------------------------------------------------------
/tests/testthat/data/test_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/tests/testthat/data/test_data.parquet


--------------------------------------------------------------------------------
/tests/testthat/data/test_exclusions.csv:
--------------------------------------------------------------------------------
1 | reference_date,report_date,state,disease
2 | 2023-01-07,2023-10-28,test,test
3 | 


--------------------------------------------------------------------------------
/tests/testthat/data/test_parameters.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/tests/testthat/data/test_parameters.parquet


--------------------------------------------------------------------------------
/tests/testthat/data/us_overall_test_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CDCgov/cfa-epinow2-pipeline/349cbea090a01498bb32a810ae357875359ff8b2/tests/testthat/data/us_overall_test_data.parquet


--------------------------------------------------------------------------------
/tests/testthat/data/v_bad_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "job_id": "6183da58-89bc-455f-8562-4f607257a876",
3 |   "task_id": "bc0c3eb3-7158-4631-a2a9-86b97357f97e"
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-expect_pipeline_files_written.R:
--------------------------------------------------------------------------------
 1 | expect_pipeline_files_written <- function(
 2 |   output_dir,
 3 |   job_id,
 4 |   task_id,
 5 |   check_logs = TRUE
 6 | ) {
 7 |   ########
 8 |   # Assert output files all exist
 9 |   job_path <- file.path(output_dir, job_id)
10 |   task_path <- file.path(job_path, "tasks", task_id)
11 | 
12 |   # Samples
13 |   expect_true(
14 |     file.exists(
15 |       file.path(
16 |         job_path,
17 |         "samples",
18 |         paste0(task_id, ".parquet")
19 |       )
20 |     )
21 |   )
22 |   # Summaries
23 |   expect_true(
24 |     file.exists(
25 |       file.path(
26 |         job_path,
27 |         "summaries",
28 |         paste0(task_id, ".parquet")
29 |       )
30 |     )
31 |   )
32 |   # Model
33 |   expect_true(file.exists(file.path(task_path, "model.rds")))
34 |   # Logs
35 |   if (check_logs) {
36 |     expect_true(file.exists(file.path(task_path, "logs.txt")))
37 |   }
38 |   # Non-empty metadata
39 |   metadata_path <- file.path(task_path, "metadata.json")
40 |   expect_true(file.exists(metadata_path))
41 |   metadata <- jsonlite::read_json(metadata_path)
42 |   expect_gt(length(metadata), 0)
43 | 
44 |   # Check that each field passes `rlang::is_atomic()`
45 |   for (field in names(metadata)) {
46 |     expect_true(rlang::is_atomic(metadata[[field]]))
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-write_exclusion.R:
--------------------------------------------------------------------------------
 1 | write_exclusions <- function() {
 2 |   exclusions <- data.frame(
 3 |     reference_date = as.Date("2023-01-07"),
 4 |     report_date = as.Date("2023-10-28"),
 5 |     state = "test",
 6 |     disease = "test"
 7 |   )
 8 |   con <- DBI::dbConnect(duckdb::duckdb())
 9 |   duckdb::duckdb_register(con, "exclusions", exclusions)
10 |   DBI::dbExecute(
11 |     con,
12 |     "COPY (SELECT * FROM exclusions)
13 |                      TO 'data/test_exclusions.csv'"
14 |   )
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-write_parameter_file.R:
--------------------------------------------------------------------------------
 1 | write_sample_parameters_file <- function(
 2 |   value,
 3 |   path,
 4 |   state,
 5 |   param,
 6 |   disease,
 7 |   parameter,
 8 |   start_date,
 9 |   end_date,
10 |   geo_value,
11 |   reference_date
12 | ) {
13 |   Sys.sleep(0.05)
14 |   df <- data.frame(
15 |     start_date = as.Date(start_date),
16 |     disease = disease,
17 |     parameter = parameter,
18 |     end_date = end_date,
19 |     geo_value = geo_value,
20 |     value = I(list(value)),
21 |     reference_date = reference_date
22 |   )
23 | 
24 |   con <- DBI::dbConnect(duckdb::duckdb())
25 |   on.exit(DBI::dbDisconnect(con), add = TRUE)
26 | 
27 |   duckdb::duckdb_register(con, "test_table", df)
28 |   sql <- "COPY (SELECT * FROM test_table) TO ?path"
29 |   query <- DBI::sqlInterpolate(
30 |     DBI::ANSI(),
31 |     sql,
32 |     path = DBI::dbQuoteIdentifier(DBI::ANSI(), path)
33 |   )
34 | 
35 |   # Retry a few times because DuckDB throws std::exception intermittently.
36 |   # This seems like a bug in DuckDB coming from on.exit not always closing the
37 |   # connection in case of error and/or the many layers of filesystem runner
38 |   # involved in writing this temp file. Rather than think too hard about it,
39 |   # this is the sledgehammer approach.
40 |   attempt <- 0
41 |   success <- NULL
42 |   while (attempt < 5 && is.null(success)) {
43 |     attempt <- attempt + 1
44 |     try(success <- DBI::dbExecute(con, query))
45 |   }
46 | 
47 |   invisible(path)
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-diagnostics.R:
--------------------------------------------------------------------------------
  1 | test_that("Fitted model extracts diagnostics", {
  2 |   # Arrange
  3 |   data_path <- test_path("data/test_data.parquet")
  4 |   con <- DBI::dbConnect(duckdb::duckdb())
  5 |   data <- DBI::dbGetQuery(
  6 |     con,
  7 |     "
  8 |                          SELECT
  9 |                            report_date,
 10 |                            reference_date,
 11 |                            disease,
 12 |                            geo_value AS state_abb,
 13 |                            value AS confirm
 14 |                          FROM read_parquet(?)
 15 |                          WHERE reference_date <= '2023-01-22'",
 16 |     params = list(data_path)
 17 |   )
 18 |   DBI::dbDisconnect(con)
 19 |   fit_path <- test_path("data", "sample_fit.rds")
 20 |   fit <- readRDS(fit_path)
 21 | 
 22 |   # Expected diagnostics
 23 |   expected <- data.frame(
 24 |     diagnostic = c(
 25 |       "mean_accept_stat",
 26 |       "p_divergent",
 27 |       "n_divergent",
 28 |       "p_max_treedepth",
 29 |       "p_high_rhat",
 30 |       "n_high_rhat",
 31 |       "diagnostic_flag",
 32 |       "low_case_count_flag"
 33 |     ),
 34 |     value = c(
 35 |       0.94240233,
 36 |       0.00000000,
 37 |       0.00000000,
 38 |       0.00000000,
 39 |       0.00000000,
 40 |       0.00000000,
 41 |       0.00000000,
 42 |       0.00000000
 43 |     ),
 44 |     job_id = rep("test", 8),
 45 |     task_id = rep("test", 8),
 46 |     disease = rep("test", 8),
 47 |     geo_value = rep("test", 8),
 48 |     model = rep("test", 8),
 49 |     stringsAsFactors = FALSE
 50 |   )
 51 |   actual <- extract_diagnostics(
 52 |     fit,
 53 |     data,
 54 |     "test",
 55 |     "test",
 56 |     "test",
 57 |     "test",
 58 |     "test"
 59 |   )
 60 | 
 61 |   testthat::expect_equal(
 62 |     actual,
 63 |     expected
 64 |   )
 65 | })
 66 | 
 67 | test_that("Cases below threshold returns TRUE", {
 68 |   # Arrange
 69 |   true_df <- data.frame(
 70 |     reference_date = seq.Date(
 71 |       from = as.Date("2023-01-01"),
 72 |       by = "day",
 73 |       length.out = 14
 74 |     ),
 75 |     confirm = c(9, rep(0, 12), 9)
 76 |   )
 77 | 
 78 |   # Act
 79 |   diagnostic <- low_case_count_diagnostic(true_df)
 80 | 
 81 |   # Assert
 82 |   expect_true(diagnostic)
 83 | })
 84 | 
 85 | test_that("Cases above threshold returns FALSE", {
 86 |   # Arrange
 87 |   false_df <- data.frame(
 88 |     reference_date = seq.Date(
 89 |       from = as.Date("2023-01-01"),
 90 |       by = "day",
 91 |       length.out = 14
 92 |     ),
 93 |     confirm = rep(10, 14)
 94 |   )
 95 | 
 96 |   # Act
 97 |   diagnostic <- low_case_count_diagnostic(false_df)
 98 | 
 99 |   # Assert
100 |   expect_false(diagnostic)
101 | })
102 | 
103 | 
104 | test_that("Only the last two weeks are evalated", {
105 |   # Arrange
106 |   # 3 weeks, first week would pass but last week does not
107 |   df <- data.frame(
108 |     reference_date = seq.Date(
109 |       from = as.Date("2023-01-01"),
110 |       by = "day",
111 |       length.out = 21
112 |     ),
113 |     # Week 1: 700, Week 2: 700, Week 3: 0
114 |     confirm = c(rep(100, 14), rep(0, 7))
115 |   )
116 | 
117 |   # Act
118 |   diagnostic <- low_case_count_diagnostic(df)
119 | 
120 |   # Assert
121 |   expect_true(diagnostic)
122 | })
123 | 
124 | test_that("Old approach's negative is now positive", {
125 |   # Arrange
126 |   df <- data.frame(
127 |     reference_date = seq.Date(
128 |       from = as.Date("2023-01-01"),
129 |       by = "day",
130 |       length.out = 14
131 |     ),
132 |     # Week 1: 21, Week 2: 0
133 |     confirm = c(rep(3, 7), rep(0, 7))
134 |   )
135 | 
136 |   # Act
137 |   diagnostic <- low_case_count_diagnostic(df)
138 | 
139 |   # Assert
140 |   expect_true(diagnostic)
141 | })
142 | 
143 | test_that("NAs are evalated as 0", {
144 |   # Arrange
145 |   df <- data.frame(
146 |     reference_date = seq.Date(
147 |       from = as.Date("2023-01-01"),
148 |       by = "day",
149 |       length.out = 14
150 |     ),
151 |     # Week 1: 6 (not NA!), Week 2: 700
152 |     confirm = c(NA_real_, rep(1, 6), rep(100, 7))
153 |   )
154 | 
155 |   # Act
156 |   diagnostic <- low_case_count_diagnostic(df)
157 | 
158 |   # Assert
159 |   expect_true(diagnostic)
160 | })
161 | 


--------------------------------------------------------------------------------
/tests/testthat/test-exclusions.R:
--------------------------------------------------------------------------------
  1 | test_that("Can apply exclusions on happy path", {
  2 |   exclusions <- data.frame(
  3 |     reference_date = as.Date("2023-01-06"),
  4 |     report_date = as.Date("2023-10-28"),
  5 |     geo_value = "test",
  6 |     disease = "test"
  7 |   )
  8 |   data_path <- test_path("data", "test_data.parquet")
  9 |   con <- DBI::dbConnect(duckdb::duckdb())
 10 |   data <- DBI::dbGetQuery(
 11 |     con,
 12 |     "
 13 |                          SELECT
 14 |                            report_date,
 15 |                            reference_date,
 16 |                            disease,
 17 |                            geo_value,
 18 |                            value AS confirm
 19 |                          FROM read_parquet(?)",
 20 |     params = list(data_path)
 21 |   )
 22 |   DBI::dbDisconnect(con)
 23 | 
 24 |   # Apply exclusion by hand
 25 |   expected <- data
 26 |   expected[
 27 |     expected[["reference_date"]] == "2023-01-06",
 28 |   ][["confirm"]] <- NA
 29 | 
 30 |   # Act
 31 |   actual <- apply_exclusions(
 32 |     cases = data,
 33 |     exclusions = exclusions
 34 |   )
 35 | 
 36 |   expect_equal(actual, expected)
 37 | })
 38 | 
 39 | test_that("Can read exclusions on happy path", {
 40 |   expected <- data.frame(
 41 |     reference_date = as.Date("2023-01-01"),
 42 |     report_date = as.Date("2023-01-02"),
 43 |     geo_value = "test",
 44 |     disease = "test"
 45 |   )
 46 | 
 47 |   con <- DBI::dbConnect(duckdb::duckdb())
 48 |   on.exit(DBI::dbDisconnect(con))
 49 |   duckdb::duckdb_register(con, "exclusions", expected)
 50 | 
 51 |   withr::with_tempdir({
 52 |     DBI::dbExecute(
 53 |       con,
 54 |       "
 55 |     COPY (
 56 |       SELECT
 57 |         reference_date,
 58 |         report_date,
 59 |         geo_value AS state,
 60 |         disease
 61 |        FROM exclusions
 62 |      ) TO 'test.csv'"
 63 |     )
 64 | 
 65 |     actual <- read_exclusions("test.csv")
 66 |   })
 67 | 
 68 |   expect_equal(actual, expected)
 69 | })
 70 | 
 71 | test_that("Empty read errors", {
 72 |   expected <- data.frame(
 73 |     reference_date = character(),
 74 |     report_date = character(),
 75 |     state = character(),
 76 |     disease = character()
 77 |   )
 78 | 
 79 |   con <- DBI::dbConnect(duckdb::duckdb())
 80 |   on.exit(DBI::dbDisconnect(con))
 81 |   duckdb::duckdb_register(con, "exclusions", expected)
 82 | 
 83 |   withr::with_tempdir({
 84 |     DBI::dbExecute(con, "COPY (FROM exclusions) TO 'test.csv'")
 85 | 
 86 |     expect_error(read_exclusions("test.csv"), class = "empty_return")
 87 |   })
 88 | })
 89 | 
 90 | test_that("Missing file errors", {
 91 |   expect_error(
 92 |     read_exclusions(path = "not_a_real_path"),
 93 |     class = "file_not_found"
 94 |   )
 95 | })
 96 | 
 97 | test_that("Bad query errors", {
 98 |   expect_error(
 99 |     read_exclusions(path = "test-exclusions.R"),
100 |     class = "wrapped_invalid_query"
101 |   )
102 | })
103 | 
104 | test_that("Works as expected on large exclusions file", {
105 |   # Read in the large exclusions file
106 |   excl_path <- test_path("data", "test_big_exclusions.csv")
107 |   exclusions <- read_exclusions(excl_path)
108 | 
109 |   # Load some sample case data
110 |   data_path <- test_path("data", "2025-04-02_test.parquet")
111 |   cases <- read_data(
112 |     data_path,
113 |     disease = "COVID-19",
114 |     geo_value = "OH",
115 |     report_date = "2025-04-02",
116 |     max_reference_date = "2025-04-02",
117 |     min_reference_date = "1970-01-01"
118 |   )
119 | 
120 |   # Apply the exclusions
121 |   got <- apply_exclusions(cases, exclusions)
122 | 
123 |   # Check that the exclusions were applied as expected
124 |   expect_equal(
125 |     got$confirm[179:181],
126 |     c(54, NA, NA)
127 |   )
128 | })
129 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fit_model.R:
--------------------------------------------------------------------------------
  1 | test_that("Minimal model fit all params runs", {
  2 |   # Parameters
  3 |   parameters <- list(
  4 |     generation_interval = sir_gt_pmf,
  5 |     delay_interval = c(0.2, 0.8),
  6 |     right_truncation = c(0.7, 0.3)
  7 |   )
  8 |   # Data -- 5 points only
  9 |   data_path <- test_path("data", "test_data.parquet")
 10 |   con <- DBI::dbConnect(duckdb::duckdb())
 11 |   data <- DBI::dbGetQuery(
 12 |     con,
 13 |     "
 14 |                          SELECT
 15 |                            report_date,
 16 |                            reference_date,
 17 |                            disease,
 18 |                            geo_value AS state_abb,
 19 |                            value AS confirm
 20 |                          FROM read_parquet(?)
 21 |                          ORDER BY reference_date
 22 |                          LIMIT 5
 23 |                           ",
 24 |     params = list(data_path)
 25 |   )
 26 |   DBI::dbDisconnect(con)
 27 |   # Priors
 28 |   priors <- list(
 29 |     rt = list(
 30 |       mean = 1,
 31 |       sd = 0.2
 32 |     ),
 33 |     gp = list(
 34 |       alpha_sd = 0.05
 35 |     )
 36 |   )
 37 |   # Sampler
 38 |   sampler_opts <- list(
 39 |     cores = 1,
 40 |     chains = 1,
 41 |     adapt_delta = 0.8,
 42 |     max_treedepth = 10,
 43 |     iter_warmup = 25,
 44 |     iter_sampling = 25
 45 |   )
 46 | 
 47 |   fit <- fit_model(
 48 |     data = data,
 49 |     parameters = parameters,
 50 |     seed = 12345,
 51 |     horizon = 0,
 52 |     priors = priors,
 53 |     sampler = sampler_opts
 54 |   )
 55 | 
 56 |   expect_s3_class(fit, "epinow")
 57 | })
 58 | 
 59 | test_that("Minimal model fit with no right trunc or delay runs", {
 60 |   # Parameters
 61 |   parameters <- list(
 62 |     generation_interval = sir_gt_pmf,
 63 |     delay_interval = NA,
 64 |     right_truncation = NA
 65 |   )
 66 |   # Data -- 5 points only
 67 |   data_path <- test_path("data", "test_data.parquet")
 68 |   con <- DBI::dbConnect(duckdb::duckdb())
 69 |   data <- DBI::dbGetQuery(
 70 |     con,
 71 |     "
 72 |                          SELECT
 73 |                            report_date,
 74 |                            reference_date,
 75 |                            disease,
 76 |                            geo_value AS state_abb,
 77 |                            value AS confirm
 78 |                          FROM read_parquet(?)
 79 |                          ORDER BY reference_date
 80 |                          LIMIT 5
 81 |                           ",
 82 |     params = list(data_path)
 83 |   )
 84 |   DBI::dbDisconnect(con)
 85 |   # Priors
 86 |   priors <- list(
 87 |     rt = list(
 88 |       mean = 1,
 89 |       sd = 0.2
 90 |     ),
 91 |     gp = list(
 92 |       alpha_sd = 0.05
 93 |     )
 94 |   )
 95 |   # Sampler
 96 |   sampler_opts <- list(
 97 |     cores = 1,
 98 |     chains = 1,
 99 |     adapt_delta = 0.8,
100 |     max_treedepth = 10,
101 |     iter_warmup = 25,
102 |     iter_sampling = 25
103 |   )
104 | 
105 |   fit <- fit_model(
106 |     data = data,
107 |     parameters = parameters,
108 |     seed = 12345,
109 |     horizon = 0,
110 |     priors = priors,
111 |     sampler = sampler_opts
112 |   )
113 | 
114 |   expect_s3_class(fit, "epinow")
115 | })
116 | 
117 | test_that("Bad params w/ failing fit issues warning and returns NA", {
118 |   # Parameterization is same as above except Stan argument `iter_warmup` is
119 |   # negative, which is an illegal parameterizaion. As a result, EpiNow2 starts
120 |   # the Stan sampler but it terminates unexpectedly with an error, which is the
121 |   # desired testing condition.
122 | 
123 |   # Parameters
124 |   parameters <- list(
125 |     generation_interval = sir_gt_pmf,
126 |     delay_interval = NA,
127 |     right_truncation = NA
128 |   )
129 |   # Data -- 5 points only
130 |   data_path <- test_path("data", "test_data.parquet")
131 |   con <- DBI::dbConnect(duckdb::duckdb())
132 |   data <- DBI::dbGetQuery(
133 |     con,
134 |     "
135 |                          SELECT
136 |                            report_date,
137 |                            reference_date,
138 |                            disease,
139 |                            geo_value AS state_abb,
140 |                            value AS confirm
141 |                          FROM read_parquet(?)
142 |                          ORDER BY reference_date
143 |                          LIMIT 5
144 |                           ",
145 |     params = list(data_path)
146 |   )
147 |   DBI::dbDisconnect(con)
148 |   # Priors
149 |   priors <- list(
150 |     rt = list(
151 |       mean = 1,
152 |       sd = 0.2
153 |     ),
154 |     gp = list(
155 |       alpha_sd = 0.05
156 |     )
157 |   )
158 |   # Sampler
159 |   sampler_opts <- list(
160 |     cores = 1,
161 |     chains = 1,
162 |     adapt_delta = 0.8,
163 |     max_treedepth = 10,
164 |     iter_warmup = -25,
165 |     iter_sampling = 25
166 |   )
167 | 
168 |   expect_error(
169 |     fit <- fit_model(
170 |       data = data,
171 |       parameters = parameters,
172 |       seed = 12345,
173 |       horizon = 0,
174 |       priors = priors,
175 |       sampler = sampler_opts
176 |     ),
177 |     class = "failing_fit"
178 |   )
179 | })
180 | 
181 | test_that("Right truncation longer than data throws error", {
182 |   data <- data.frame(x = c(1, 2))
183 |   right_truncation_pmf <- c(0.1, 0.2, 0.7)
184 | 
185 |   expect_snapshot_warning(
186 |     format_right_truncation(
187 |       right_truncation_pmf,
188 |       data
189 |     )
190 |   )
191 | })
192 | 
193 | test_that("Missing GI throws error", {
194 |   expect_error(format_generation_interval(NA), class = "Missing_GI")
195 | })
196 | 
197 | test_that("Missing keys throws error", {
198 |   random_seed <- 12345
199 |   expect_snapshot(format_stan_opts(list(), random_seed), error = TRUE)
200 | })
201 | 


--------------------------------------------------------------------------------
/tests/testthat/test-pipeline.R:
--------------------------------------------------------------------------------
  1 | test_that("Bad config throws warning and returns failure", {
  2 |   # Arrange
  3 |   config_path <- test_path("data", "bad_config.json")
  4 |   config <- jsonlite::read_json(config_path)
  5 |   # Read from locally
  6 |   output_container <- NULL
  7 |   output_dir <- "pipeline_test"
  8 |   input_dir <- "."
  9 |   on.exit(unlink(output_dir, recursive = TRUE))
 10 | 
 11 |   # Act
 12 |   expect_warning(
 13 |     pipeline_success <- orchestrate_pipeline(
 14 |       config_path = config_path,
 15 |       input_dir = input_dir,
 16 |       output_dir = output_dir
 17 |     ),
 18 |     class = "Bad_config"
 19 |   )
 20 |   expect_false(pipeline_success)
 21 | })
 22 | 
 23 | test_that("Pipeline run produces expected outputs with NO exclusions", {
 24 |   # Arrange
 25 |   config_path <- test_path("data", "sample_config_no_exclusion.json")
 26 |   config <- jsonlite::read_json(config_path)
 27 |   # Read from locally
 28 |   output_container <- NULL
 29 |   output_dir <- "pipeline_test"
 30 |   input_dir <- "."
 31 |   on.exit(unlink(output_dir, recursive = TRUE))
 32 | 
 33 |   # Act
 34 |   pipeline_success <- orchestrate_pipeline(
 35 |     config_path = config_path,
 36 |     input_dir = input_dir,
 37 |     output_dir = output_dir
 38 |   )
 39 |   expect_true(pipeline_success)
 40 |   expect_pipeline_files_written(
 41 |     output_dir,
 42 |     config[["job_id"]],
 43 |     config[["task_id"]]
 44 |   )
 45 | })
 46 | 
 47 | test_that("Pipeline run produces expected outputs with exclusions", {
 48 |   # Arrange
 49 |   input_dir <- test_path("data")
 50 |   config_path <- "sample_config_with_exclusion.json"
 51 |   config <- jsonlite::read_json(file.path(input_dir, config_path))
 52 |   # Read from locally
 53 |   output_container <- NULL
 54 |   output_dir <- "pipeline_test"
 55 |   on.exit(unlink(output_dir, recursive = TRUE))
 56 | 
 57 |   # Act
 58 |   pipeline_success <- orchestrate_pipeline(
 59 |     config_path = config_path,
 60 |     input_dir = input_dir,
 61 |     output_dir = output_dir
 62 |   )
 63 | 
 64 |   ########
 65 |   # Assert output files all exist
 66 |   expect_pipeline_files_written(
 67 |     output_dir,
 68 |     config[["job_id"]],
 69 |     config[["task_id"]]
 70 |   )
 71 |   expect_true(pipeline_success)
 72 | })
 73 | 
 74 | test_that("Process pipeline produces expected outputs and returns success", {
 75 |   # Arrange
 76 |   input_dir <- "data"
 77 |   config_path <- file.path(input_dir, "sample_config_with_exclusion.json")
 78 |   config <- read_json_into_config(
 79 |     config_path,
 80 |     c("exclusions", "output_container")
 81 |   )
 82 |   # Read from locally
 83 |   output_dir <- "pipeline_test"
 84 |   on.exit(unlink(output_dir, recursive = TRUE))
 85 | 
 86 |   # Act
 87 |   pipeline_success <- execute_model_logic(
 88 |     config = config,
 89 |     input_dir = input_dir,
 90 |     output_dir = output_dir
 91 |   )
 92 |   expect_true(pipeline_success)
 93 | 
 94 |   ########
 95 |   # Assert output files all exist
 96 |   expect_pipeline_files_written(
 97 |     output_dir,
 98 |     config@job_id,
 99 |     config@task_id,
100 |     # Don't check logs here, bc logs are set up by orchestrate_pipeline(), but
101 |     # this test is just for execute_model_logic() which is called after logs are
102 |     # set up in orchestrate_pipeline().
103 |     check_logs = FALSE
104 |   )
105 | })
106 | 
107 | test_that("Runs on config from generator as of 2024-11-26", {
108 |   # Arrange
109 |   config_path <- "CA_COVID-19.json"
110 |   input_dir <- test_path("data")
111 |   config <- read_json_into_config(
112 |     file.path(input_dir, config_path),
113 |     c("exclusions", "output_container")
114 |   )
115 |   # Read from locally
116 |   output_dir <- test_path("pipeline_test")
117 |   on.exit(unlink(output_dir, recursive = TRUE))
118 | 
119 |   # Act
120 |   pipeline_success <- execute_model_logic(
121 |     config = config,
122 |     output_dir = output_dir,
123 |     input_dir = input_dir
124 |   )
125 |   expect_true(pipeline_success)
126 | 
127 |   ########
128 |   # Assert output files all exist
129 |   expect_pipeline_files_written(
130 |     output_dir,
131 |     config@job_id,
132 |     config@task_id,
133 |     # Do not check for log output here, bc logs get created in
134 |     # `orchestrate_pipeline()`, and this test only calls `execute_model_logic()`
135 |     # which gets called after the log files have been created.
136 |     check_logs = FALSE
137 |   )
138 | })
139 | 
140 | test_that("Warning and exit for bad config file", {
141 |   # Arrange
142 |   config_path <- test_path("v_bad_config.json")
143 |   # Read from locally
144 |   input_dir <- test_path("data")
145 |   output_dir <- test_path("bad_output")
146 |   on.exit(unlink(output_dir, recursive = TRUE))
147 | 
148 |   # Act
149 |   expect_warning(
150 |     pipeline_success <- orchestrate_pipeline(
151 |       config_path = config_path,
152 |       input_dir = input_dir,
153 |       output_dir = output_dir
154 |     ),
155 |     class = "Bad_config"
156 |   )
157 |   expect_false(pipeline_success)
158 | })
159 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read_data.R:
--------------------------------------------------------------------------------
  1 | test_that("Data read for one state works on happy path", {
  2 |   data_path <- test_path("data/test_data.parquet")
  3 |   con <- DBI::dbConnect(duckdb::duckdb())
  4 |   expected <- DBI::dbGetQuery(
  5 |     con,
  6 |     "
  7 |                          SELECT
  8 |                            report_date,
  9 |                            reference_date,
 10 |                            disease,
 11 |                            geo_value AS geo_value,
 12 |                            value AS confirm
 13 |                          FROM read_parquet(?)
 14 |                          WHERE reference_date <= '2023-01-22'",
 15 |     params = list(data_path)
 16 |   )
 17 |   DBI::dbDisconnect(con)
 18 | 
 19 |   actual <- read_data(
 20 |     data_path,
 21 |     disease = "test",
 22 |     geo_value = "test",
 23 |     report_date = "2023-10-28",
 24 |     min_reference_date = as.Date("2023-01-02"),
 25 |     max_reference_date = "2023-01-22"
 26 |   )
 27 | 
 28 |   expect_equal(actual, expected)
 29 | })
 30 | 
 31 | test_that("Data read for US overall works on happy path", {
 32 |   data_path <- test_path("data/us_overall_test_data.parquet")
 33 |   con <- DBI::dbConnect(duckdb::duckdb())
 34 |   expected <- DBI::dbGetQuery(
 35 |     con,
 36 |     "
 37 |                          SELECT
 38 |                            report_date,
 39 |                            reference_date,
 40 |                            disease,
 41 |                            geo_value AS geo_value,
 42 |                            value AS confirm
 43 |                          FROM read_parquet(?)
 44 |                          WHERE reference_date <= '2023-01-22'",
 45 |     params = list(data_path)
 46 |   )
 47 |   DBI::dbDisconnect(con)
 48 | 
 49 |   actual <- read_data(
 50 |     data_path,
 51 |     disease = "test",
 52 |     geo_value = "US",
 53 |     report_date = "2023-10-28",
 54 |     min_reference_date = "2023-01-02",
 55 |     max_reference_date = "2023-01-22"
 56 |   )
 57 | 
 58 |   expect_equal(actual, expected)
 59 | })
 60 | 
 61 | test_that("Reading a file that doesn't exist fails", {
 62 |   data_path <- "not_a_real_file"
 63 |   expect_error(
 64 |     read_data(
 65 |       data_path,
 66 |       disease = "test",
 67 |       geo_value = "not_a_real_state",
 68 |       report_date = "2023-10-28",
 69 |       min_reference_date = "2023-01-02",
 70 |       max_reference_date = "2023-01-22"
 71 |     ),
 72 |     class = "file_not_found"
 73 |   )
 74 | })
 75 | 
 76 | test_that("A query with no matching return fails", {
 77 |   data_path <- test_path("data/us_overall_test_data.parquet")
 78 |   expect_error(
 79 |     read_data(
 80 |       data_path,
 81 |       disease = "test",
 82 |       geo_value = "not_a_real_state",
 83 |       report_date = "2023-10-28",
 84 |       min_reference_date = "2023-01-02",
 85 |       max_reference_date = "2023-01-22"
 86 |     ),
 87 |     class = "empty_return"
 88 |   )
 89 | })
 90 | 
 91 | test_that("An invalid query throws a wrapped error", {
 92 |   # point the query at a non-parquet file
 93 |   data_path <- test_path("test-read_data.R")
 94 |   expect_error(
 95 |     read_data(
 96 |       data_path,
 97 |       disease = "test",
 98 |       geo_value = "not_a_real_state",
 99 |       report_date = "2023-10-28",
100 |       min_reference_date = "2023-01-02",
101 |       max_reference_date = "2023-01-22"
102 |     ),
103 |     class = "wrapped_invalid_query"
104 |   )
105 | })
106 | 
107 | test_that("Incomplete return throws warning", {
108 |   data_path <- test_path("data/test_data.parquet")
109 | 
110 |   # Two missing dates
111 |   expect_snapshot_warning(
112 |     read_data(
113 |       data_path,
114 |       disease = "test",
115 |       geo_value = "test",
116 |       report_date = "2023-10-28",
117 |       min_reference_date = "2022-12-31",
118 |       max_reference_date = "2023-01-22"
119 |     ),
120 |     class = "incomplete_return"
121 |   )
122 | })
123 | 
124 | test_that("Replace COVID-19/Omicron with COVID-19, one state", {
125 |   data_path <- test_path("data/CA_test.parquet")
126 | 
127 |   actual <- read_data(
128 |     data_path,
129 |     disease = "COVID-19",
130 |     geo_value = "CA",
131 |     report_date = "2024-11-26",
132 |     min_reference_date = as.Date("2024-06-01"),
133 |     max_reference_date = "2024-11-25"
134 |   )
135 | 
136 |   # Expect that there should be no "COVID-19/Omicron" in the data,
137 |   # only "COVID-19"
138 |   expect_false("COVID-19/Omicron" %in% actual$disease)
139 |   expect_true(all(actual$disease == "COVID-19"))
140 | })
141 | 
142 | 
143 | test_that("Replace COVID-19/Omicron with COVID-19, US", {
144 |   data_path <- test_path("data/CA_test.parquet")
145 | 
146 |   actual <- read_data(
147 |     data_path,
148 |     disease = "COVID-19",
149 |     geo_value = "US",
150 |     report_date = "2024-11-26",
151 |     min_reference_date = as.Date("2024-06-01"),
152 |     max_reference_date = "2024-11-25"
153 |   )
154 | 
155 |   # Expect that there should be no "COVID-19/Omicron" in the data,
156 |   # only "COVID-19"
157 |   expect_false("COVID-19/Omicron" %in% actual$disease)
158 |   expect_true(all(actual$disease == "COVID-19"))
159 | })
160 | 


--------------------------------------------------------------------------------
/thanks.md:
--------------------------------------------------------------------------------
1 | # Thanks and Acknowledgements
2 | 
3 | Starting this file way too late, but wanted to recognize contributions made by people who helped this repo. There are many more than this, but I should have started this file years ago.
4 | 
5 | * Chris Sandlin [@cssandlin](https://github.com/cssandlin)
6 | * Drewry Morris [@drewry](https://github.com/drewry)
7 | 


--------------------------------------------------------------------------------
/utils/Rt_review_exclusions.R:
--------------------------------------------------------------------------------
  1 | option_list <- list(
  2 |   optparse::make_option(
  3 |     c("-d", "--dates"),
  4 |     type = "character",
  5 |     default = gsub(
  6 |       "-",
  7 |       "",
  8 |       lubridate::today(tzone = "UTC")
  9 |     ),
 10 |     help = "Reports Date in yyyymmdd format",
 11 |     metavar = "character"
 12 |   )
 13 | )
 14 | opt_parser <- optparse::OptionParser(option_list = option_list)
 15 | opt <- optparse::parse_args(opt_parser)
 16 | # Get All Files Names to Download and Parse
 17 | date_names <- opt$dates
 18 | 
 19 | 
 20 | read_process_excel_func <- function(
 21 |   sheet_name,
 22 |   pathogen,
 23 |   file_name,
 24 |   report_date
 25 | ) {
 26 |   df <- readxl::read_excel(
 27 |     paste0(file_name), # path where saved
 28 |     sheet = sheet_name,
 29 |     skip = 3,
 30 |     col_names = c(
 31 |       "state",
 32 |       "dates_affected",
 33 |       "observed volume",
 34 |       "expected volume",
 35 |       "initial_thoughts",
 36 |       "state_abb",
 37 |       "review_1_decision",
 38 |       "reviewer_2_decision",
 39 |       "final_decision",
 40 |       "drop_dates",
 41 |       "additional_reasoning"
 42 |     )
 43 |   )
 44 |   df <- df |> dplyr::mutate(drop_dates = as.character(drop_dates))
 45 |   df <- data.frame(tidyr::separate_rows(df, 10, sep = "\\|")) |>
 46 |     dplyr::filter(!is.na(state)) |>
 47 |     dplyr::mutate(
 48 |       report_date = report_date,
 49 |       pathogen = pathogen
 50 |     ) |>
 51 |     dplyr::select(
 52 |       "report_date",
 53 |       "state",
 54 |       "state_abb",
 55 |       "pathogen",
 56 |       "review_1_decision",
 57 |       "reviewer_2_decision",
 58 |       "final_decision",
 59 |       "drop_dates"
 60 |     )
 61 |   return(df)
 62 | }
 63 | 
 64 | 
 65 | create_pt_excl_from_rt_xslx <- function(dates) {
 66 |   # Connect to Sharepoint via Microsoft365R library
 67 |   # Provide team name here
 68 |   site <- Microsoft365R::get_sharepoint_site(
 69 |     auth_type = "device_code",
 70 |     "OD-OCoS-Center for Forecasting and Outbreak Analytics"
 71 |   )
 72 |   drv <- site$get_drive("Documents") # Set drive to Documents (vs Wiki)
 73 |   rt_review_path <- file.path(
 74 |     "General",
 75 |     "02 - Predict",
 76 |     "Real Time Monitoring (RTM) Branch",
 77 |     "Nowcasting and Natural History",
 78 |     "Rt",
 79 |     "NSSP-Rt",
 80 |     "Rt_Review_Notes",
 81 |     "Review_Decisions"
 82 |   )
 83 | 
 84 |   for (report_date in dates) {
 85 |     fname <- paste0("Rt_Review_", report_date, ".xlsx")
 86 |     drv$get_item(file.path(rt_review_path, fname))$download(
 87 |       dest = paste0(fname),
 88 |       overwrite = TRUE
 89 |     )
 90 |     # read and process the COVID sheet
 91 |     covid_df <- read_process_excel_func(
 92 |       sheet_name = "Rt_Review_COVID",
 93 |       pathogen = "covid",
 94 |       file_name = fname,
 95 |       report_date = report_date
 96 |     )
 97 |     # read and process the Influenza sheet
 98 |     influenza_df <- read_process_excel_func(
 99 |       sheet_name = "Rt_Review_Influenza",
100 |       pathogen = "influenza",
101 |       file_name = fname,
102 |       report_date = report_date
103 |     )
104 |     # Overall Rt_review machine readable format
105 |     combined_df <- rbind(covid_df, influenza_df)
106 |     if (file.exists(paste0(fname))) {
107 |       # Delete file if it exists
108 |       file.remove(paste0(fname))
109 |     }
110 |     # Further processing
111 |     combined_df <- combined_df |>
112 |       dplyr::mutate(
113 |         reference_date = lubridate::ymd(drop_dates),
114 |         report_date = lubridate::ymd(report_date),
115 |         geo_value = state_abb,
116 |         pathogen = dplyr::case_when(
117 |           pathogen == "influenza" ~ "Influenza",
118 |           pathogen == "covid" ~ "COVID-19",
119 |           .default = as.character(pathogen)
120 |         )
121 |       )
122 | 
123 |     # point exclusions in outlier.csv format
124 |     point_exclusions <- combined_df |>
125 |       dplyr::filter(!is.na(drop_dates)) |>
126 |       dplyr::mutate(
127 |         raw_confirm = NA,
128 |         clean_confirm = NA
129 |       ) |>
130 |       dplyr::select(
131 |         reference_date,
132 |         report_date,
133 |         "state" = "geo_value",
134 |         "disease" = "pathogen"
135 |       )
136 |     container_name <- "nssp-etl"
137 |     cont <- CFAEpiNow2Pipeline::fetch_blob_container(container_name)
138 | 
139 |     cli::cli_alert_info(
140 |       "saving {lubridate::ymd(report_date)}.csv in
141 |       {container_name}/outliers-v2"
142 |     )
143 |     AzureStor::storage_write_csv(
144 |       cont = cont,
145 |       object = point_exclusions,
146 |       file = file.path(
147 |         "outliers-v2",
148 |         paste0(lubridate::ymd(report_date), ".csv")
149 |       )
150 |     )
151 | 
152 |     #### State exclusions #####
153 |     state_exclusions <- combined_df |>
154 |       dplyr::filter(
155 |         final_decision %in%
156 |           c(
157 |             "Exclude State (Data)",
158 |             "Exclude State (Model)",
159 |             "Exclude State"
160 |           )
161 |       ) |>
162 |       dplyr::mutate(
163 |         type = dplyr::case_when(
164 |           final_decision == "Exclude State (Data)" ~ "Data",
165 |           final_decision == "Exclude State (Model)" ~ "Model"
166 |         )
167 |       ) |>
168 |       dplyr::select(state_abb, pathogen, type)
169 | 
170 |     container_name <- "nssp-etl"
171 |     cont <- CFAEpiNow2Pipeline::fetch_blob_container(container_name)
172 |     file <- paste0(lubridate::ymd(report_date), "_state_exclusions.csv")
173 |     cli::cli_alert_info(
174 |       "saving {file} in {container_name}/state_exclusions"
175 |     )
176 |     AzureStor::storage_write_csv(
177 |       cont = cont,
178 |       object = state_exclusions,
179 |       file = file.path(
180 |         "state_exclusions",
181 |         file
182 |       )
183 |     )
184 | 
185 |     #### Temp old-pipeline csv generator#####
186 |     # Save a version in temp folder.
187 |     # Need to copy and paste this in current blank outlier csv file
188 |     # Can get rid of this once we end old pipeline support
189 |     point_exclusions <- combined_df |>
190 |       dplyr::filter(!is.na(drop_dates)) |>
191 |       dplyr::mutate(
192 |         raw_confirm = NA,
193 |         clean_confirm = NA
194 |       ) |>
195 |       dplyr::select(
196 |         reference_date,
197 |         report_date,
198 |         "geo_value",
199 |         "pathogen"
200 |       ) |>
201 |       dplyr::mutate(
202 |         geo_value = tolower(geo_value),
203 |         pathogen = dplyr::case_when(
204 |           pathogen == "Influenza" ~ "flu",
205 |           pathogen == "COVID-19" ~ "covid",
206 |           .default = as.character(pathogen)
207 |         )
208 |       )
209 |     cli::cli_alert_info(
210 |       "saving {lubridate::ymd(report_date)}.csv in
211 |       {container_name}/temp_outliers_for_old"
212 |     )
213 |     AzureStor::storage_write_csv(
214 |       cont = cont,
215 |       object = point_exclusions,
216 |       file = file.path(
217 |         "temp_outliers_for_old",
218 |         paste0(lubridate::ymd(report_date), ".csv")
219 |       )
220 |     )
221 |   }
222 | }
223 | 
224 | 
225 | create_pt_excl_from_rt_xslx(dates = date_names)
226 | 


--------------------------------------------------------------------------------