├── .coverage ├── .flake8 ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ ├── maintenance.md │ └── support.yml ├── PULL_REQUEST_TEMPLATE │ ├── pull_request_template.md │ └── vulnerability.md └── workflows │ ├── deployment.yaml │ ├── destroy.yaml │ ├── end-to-end.yaml │ ├── terraformChecks.yaml │ ├── terraformSetup.yaml │ └── testPython.yaml ├── .gitignore ├── LICENSE ├── docs ├── CONTRIBUTING.md ├── DISCLAIMER.md ├── README.md ├── azure-select-source-data-container.png ├── basic-guide-to-fhir-api-calls.md ├── code-of-conduct.md ├── devops.md ├── execution-guide.md ├── getting_started.md ├── images │ ├── LA-detailed-arch.png │ ├── LA-high-level-arch.png │ ├── azure-browse.png │ ├── azure-cloud-copy-code.png │ ├── azure-cloud-create-storage.png │ ├── azure-cloud-logged-in-terminal.png │ ├── azure-cloud-logged-in.png │ ├── azure-cloud-select-bash.png │ ├── azure-cloud-shell-bash.png │ ├── azure-cloud-shell-login.png │ ├── azure-cloud-shell-ready.png │ ├── azure-cloud-shell-storage.png │ ├── azure-cloud-shell.png │ ├── azure-cloud_set_FHIR_variable.png │ ├── azure-containers.png │ ├── azure-data-factory-error-button.png │ ├── azure-data-factory-error-details.png │ ├── azure-data-factory-failure.png │ ├── azure-data-factory-launch-studio.png │ ├── azure-device-login.png │ ├── azure-fhir-api-endpoint.png │ ├── azure-fhir-api-response.png │ ├── azure-fhir-api-search.png │ ├── azure-fhir-server.png │ ├── azure-filter-storage-accounts.png │ ├── azure-find-fhir-server.png │ ├── azure-ingestion-single-execution.png │ ├── azure-pipeline-diagram.png │ ├── azure-pipeline-select-monitor.png │ ├── azure-portal-check-account.png │ ├── azure-portal-resource-group.png │ ├── azure-portal-resource-groups.png │ ├── azure-portal.png │ ├── azure-search-cloud-storage.png │ ├── azure-search-data-factories.png │ ├── azure-search-fhir-viewer.png │ ├── azure-select-containers.png │ ├── azure-select-fhir-store.png │ ├── azure-select-ingestion-pipeline-in-studio.png │ ├── azure-select-ingestion-pipeline.png │ ├── azure-select-phi-bucket.png │ ├── azure-select-source-data-container.png │ ├── azure-select-vxu-folder.png │ ├── azure-starter-kit-arch.drawio │ ├── azure-starter-kit-arch.drawio.png │ ├── azure-upload-file.png │ ├── azure-upload.png │ ├── azure-workflow-source.png │ ├── create-environment-1.png │ ├── create-environment-2.png │ ├── create-environment-3.png │ ├── deployment-1.png │ ├── deployment-2.png │ ├── deployment-3.png │ ├── edit-secrets.png │ ├── filter-to-workspace.png │ ├── fork-repo-1.png │ ├── fork-repo-2.png │ ├── high-level-phdi-arch.drawio │ ├── high-level-phdi-arch.png │ ├── make-new-env-1.png │ ├── make-new-env-2.png │ ├── manage-deleted-vaults.png │ ├── navigate-to-actions.png │ ├── navigate-to-settings.png │ ├── quick-start-ready.png │ ├── repo-secret-2.png │ ├── repo-secret-3.png │ ├── repo-secret-4.png │ ├── resource-group.png │ ├── terraform-setup-1.png │ ├── terraform-setup-2.png │ ├── terraform-setup-3.png │ ├── trigger-deployment-1.png │ ├── trigger-deployment-2.png │ ├── troubleshooting-guide-1.png │ ├── troubleshooting-guide-10.png │ ├── troubleshooting-guide-11.png │ ├── troubleshooting-guide-12.png │ ├── troubleshooting-guide-13.png │ ├── troubleshooting-guide-14.png │ ├── troubleshooting-guide-15.png │ ├── troubleshooting-guide-2.png │ ├── troubleshooting-guide-3.png │ ├── troubleshooting-guide-4.png │ ├── troubleshooting-guide-5.png │ ├── troubleshooting-guide-6.png │ ├── troubleshooting-guide-7.png │ ├── troubleshooting-guide-8.png │ └── troubleshooting-guide-9.png ├── implementation-guide.md ├── local-development.md ├── open_practices.md ├── phdi-fhir-api-template.postman_collection.json ├── pipeline-troubleshooting-guide.md ├── rules_of_behavior.md ├── support.md └── thanks.md ├── quick-start.sh ├── sample-data ├── ECR_e2e_sample.xml ├── ELR_e2e_sample_1.hl7 ├── ELR_e2e_sample_2.hl7 ├── VXU-V04-01_success_single.hl7 ├── VXU-V04-02_failedConversion.hl7 ├── VXU-V04-02_failedUpload.hl7 ├── VXU-V04-02_success_batch.hl7 ├── VXU-V04-03_batch_1_success_1_failConversion.hl7 └── VXU_single_messy_demo.hl7 ├── scripts ├── Synapse │ ├── ReRunECRfromPostBundle.ipynb │ ├── analyzeLinkageAlgorithms.ipynb │ ├── config │ │ ├── ECRDatastoreRefreshConfig.json │ │ ├── ECRDatastoreRefreshDailyTriggerConfig.json │ │ ├── SynapseAnalyticsPipelineConfig.json │ │ ├── SynapseAnalyticsPipelineWeeklyTriggerConfig.json │ │ ├── UpdateMIIConfig.json │ │ ├── UpdateMIITriggerConfig.json │ │ ├── UpdateMPIConfig.json │ │ ├── UpdateMPITriggerConfig.json │ │ ├── covid_identification_config.json │ │ └── ecr_datastore_config.json │ ├── convertParquetMPI.ipynb │ ├── generateIRISCaseFiles.ipynb │ ├── updateECRDataStore.ipynb │ ├── updateECRDataStoreIncidentID.ipynb │ ├── updateECRDataStoreIrisID.ipynb │ ├── updateECRDataStorePersonID.ipynb │ └── updateMII.ipynb ├── assets │ ├── dummy_delta_lake.ipynb │ └── test_delta_lake │ │ ├── .part-00000-f4dc767c-0c82-4b10-8b6b-3b1b58486a0a-c000.snappy.parquet.crc │ │ ├── .part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet.crc │ │ ├── .part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet.crc │ │ ├── _delta_log │ │ ├── .00000000000000000000.json.crc │ │ └── 00000000000000000000.json │ │ ├── part-00000-f4dc767c-0c82-4b10-8b6b-3b1b58486a0a-c000.snappy.parquet │ │ ├── part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet │ │ └── part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet └── get_sha.sh ├── serverless-functions ├── ReadSourceData │ ├── __init__.py │ └── function.json ├── host.json ├── requirements.txt ├── requirements_dev.txt └── tests │ └── ReadSourceData │ ├── CDA_RR.xml │ ├── CDA_eICR.xml │ ├── test_fhir_bundle.json │ └── test_read_source_data.py └── terraform ├── implementation ├── backend.tf ├── main.tf └── variables.tf ├── modules ├── data_factory │ ├── data.tf │ ├── ingestion-pipeline.json │ ├── main.tf │ ├── outputs.tf │ ├── pipeline-metrics-dashboard.json │ └── variables.tf ├── read_source_data │ ├── data.tf │ ├── main.tf │ ├── outputs.tf │ └── variables.tf └── shared │ ├── data.tf │ ├── main.tf │ ├── outputs.tf │ └── variables.tf └── setup ├── main.tf └── variables.tf /.coverage: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/.coverage -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .git, 4 | .pytest_cache, 5 | __pycache__, 6 | docs 7 | max-line-length = 88 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Normal text let sit to auto 2 | *.htm text 3 | *.html text 4 | *.css text 5 | *.js text 6 | 7 | ## Declare files that will always have LF (aka \n aka 10 aka 0x0a) line endings on checkout. 8 | *.sh text eol=lf 9 | *.md text eol=lf 10 | *.json text eol=lf 11 | *.yml text eol=lf 12 | *.csv text eol=lf 13 | 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what feature is not working. 12 | 13 | **Impact** 14 | Please describe the impact this bug is causing to your program or organization. 15 | 16 | **To Reproduce** 17 | Steps to reproduce the behavior: 18 | 1. Go to '...' 19 | 2. Click on '....' 20 | 3. Scroll down to '....' 21 | 4. See error 22 | 23 | **Expected behavior** 24 | A clear and concise description of what you expected to happen. 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Logs** 30 | If applicable, please attach logs to help describe your problem. 31 | 32 | **Desktop (please complete the following information):** 33 | - OS: [e.g. iOS] 34 | - Browser [e.g. chrome, safari] 35 | - Version [e.g. 22] 36 | 37 | **Smartphone (please complete the following information):** 38 | - Device: [e.g. iPhone6] 39 | - OS: [e.g. iOS8.1] 40 | - Browser [e.g. stock browser, safari] 41 | - Version [e.g. 22] 42 | 43 | **Additional context** 44 | Add any other context about the problem here. 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/maintenance.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Maintenance 3 | about: Questions and requests related to organizational support and maintenance 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What type of help do you need?** 11 | 12 | * [ ] Question 13 | * [ ] New Repo 14 | * [ ] Delete Repo 15 | * [ ] Other 16 | 17 | **Please describe how you'd like us to help.** 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/support.yml: -------------------------------------------------------------------------------- 1 | name: Support 2 | description: File a support ticket to describe the problem that you're encountering. 3 | title: "[Support]: " 4 | labels: ["support"] 5 | body: 6 | - type: textarea 7 | id: problem 8 | attributes: 9 | label: Problem 10 | description: Provide a clear and concise description of what the problem is, including the associated pipeline ids and the impacted pipeline activities if possible. 11 | placeholder: I noticed an error with pipeline run id "9cf47678-e546-11ed-9b86-00155d9996ff" at the "validation_if" activity. The validation failed due to missing information in the 'City' field despite this being an optional field. 12 | validations: 13 | required: true 14 | - type: markdown 15 | attributes: 16 | value: | 17 | #### Note: Please create a separate support ticket for each type of error you encounter. 18 | - type: textarea 19 | id: impact 20 | attributes: 21 | label: Impact 22 | description: If applicable, describe the impact this problem is causing to your program or organization. 23 | placeholder: This error impacts ~30% of eCR messages. I would consider this problem to be severity level 2 since it prevents the processing of a significant amount of eCR messages. 24 | validations: 25 | required: false 26 | - type: markdown 27 | attributes: 28 | value: | 29 | #### Note: Please suggest a severity label of 1 (critical), 2 (major), or 3 (minor) to this ticket prior to submitting. Descriptions and examples of each severity level can be found on the [Atlassian Severity Levels site](https://www.atlassian.com/incident-management/kpis/severity-levels). 30 | - type: textarea 31 | id: steps 32 | attributes: 33 | label: Steps to Reproduce 34 | description: List steps to reproduce the problem. 35 | value: | 36 | 1. 37 | 2. 38 | 3. 39 | validations: 40 | required: false 41 | - type: textarea 42 | id: intended-outcome 43 | attributes: 44 | label: Intended outcome 45 | description: Describe a clear and concise description of what you expected to happen. 46 | placeholder: I expected the eCR message to successfully pass validation despite not having information in the "City" field. 47 | validations: 48 | required: false 49 | - type: textarea 50 | id: desktop 51 | attributes: 52 | label: Desktop information 53 | description: Provide information about the OS, Browser, and Version. 54 | value: | 55 | OS [e.g., iOS]: 56 | Browser [e.g., Chrome, Safari]: 57 | Version [e.g., 22]: 58 | - type: textarea 59 | id: attachments 60 | attributes: 61 | label: Attachments 62 | description: You can attach screenshots or log files by clicking this area to highlight it and then dragging files in. 63 | placeholder: Please make sure your attachments do NOT include PHI. 64 | validations: 65 | required: false 66 | - type: checkboxes 67 | id: PHI 68 | attributes: 69 | label: PHI Review 70 | description: Confirm there is no Protected Health Information (PHI) in the information provided, including attachments such as screenshots or log files. 71 | options: 72 | - label: I have reviewed the information in this ticket and can confirm that no PHI has been included. 73 | required: true 74 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | **Please describe the bug this fixes or the feature this adds.** 10 | 11 | **Please describe how you tested this change. Include unit tests whenever possible.** 12 | 13 | **Did you create or modify any associated documentation with this change? If documentation is not included in PR, please link to related documentation.** 14 | 15 | **If you added or modified HTML, did you check that it was 508 compliant?** 16 | 17 | **Please tag any specific reviewers you would like to review this PR** 18 | 19 | **Please include the following checks for open source contributing?** 20 | 21 | * [ ] Did you check for sensitive data, and remove any? 22 | * [ ] Are additional approvals needed for this change? 23 | * [ ] Are there potential vulnerabilities or licensing issues with any new dependencies introduced? 24 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/vulnerability.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Vulnerability Maintenance 3 | about: Routine updates to address vulnerabilities. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What vulnerabilities does this PR remove or update?** 11 | 12 | **Have you tested to make sure these updates do not cause unintended consequences?** 13 | 14 | **Are these patch updates? minor? major?** 15 | -------------------------------------------------------------------------------- /.github/workflows/destroy.yaml: -------------------------------------------------------------------------------- 1 | name: Destroy environment 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | environment: 7 | description: "Environment to destroy" 8 | type: environment 9 | required: true 10 | 11 | permissions: 12 | id-token: write 13 | contents: read 14 | packages: write 15 | jobs: 16 | destroy: 17 | name: Destroy (${{github.event.inputs.environment}}) environment 18 | runs-on: ubuntu-latest 19 | environment: main 20 | defaults: 21 | run: 22 | shell: bash 23 | working-directory: ./terraform/implementation 24 | outputs: 25 | tf_env: ${{ steps.set-environment.outputs.tf_env }} 26 | short_cid: ${{ steps.set-environment.outputs.short_cid }} 27 | steps: 28 | - name: Check Out Changes 29 | uses: actions/checkout@v3 30 | 31 | - name: Setup Terraform 32 | uses: hashicorp/setup-terraform@v2 33 | 34 | - name: "Azure login" 35 | uses: azure/login@v1 36 | with: 37 | client-id: ${{ secrets.CLIENT_ID }} 38 | tenant-id: ${{ secrets.TENANT_ID }} 39 | subscription-id: ${{ secrets.SUBSCRIPTION_ID }} 40 | 41 | - name: Load input variables 42 | env: 43 | SUBSCRIPTION_ID: ${{ secrets.SUBSCRIPTION_ID }} 44 | LOCATION: ${{ secrets.LOCATION }} 45 | RESOURCE_GROUP_NAME: ${{ secrets.RESOURCE_GROUP_NAME }} 46 | SMARTY_AUTH_ID: ${{ secrets.SMARTY_AUTH_ID }} 47 | SMARTY_AUTH_TOKEN: ${{ secrets.SMARTY_AUTH_TOKEN }} 48 | SMARTY_LICENSE_TYPE: ${{ secrets.SMARTY_LICENSE_TYPE }} 49 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 50 | OBJECT_ID: ${{ secrets.OBJECT_ID }} 51 | GHCR_USERNAME: ${{ github.actor }} 52 | GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} 53 | run: | 54 | echo subscription_id=\""$SUBSCRIPTION_ID"\" >> terraform.tfvars 55 | echo location=\""$LOCATION"\" >> terraform.tfvars 56 | echo resource_group_name=\""$RESOURCE_GROUP_NAME"\" >> terraform.tfvars 57 | echo smarty_auth_id=\""$SMARTY_AUTH_ID"\" >> terraform.tfvars 58 | echo smarty_auth_token=\""$SMARTY_AUTH_TOKEN"\" >> terraform.tfvars 59 | echo smarty_license_type=\""$SMARTY_LICENSE_TYPE"\" >> terraform.tfvars 60 | echo client_id=\""$CLIENT_ID"\" >> terraform.tfvars 61 | echo object_id=\""$OBJECT_ID"\" >> terraform.tfvars 62 | echo ghcr_username=\""$GHCR_USERNAME"\" >> terraform.tfvars 63 | echo ghcr_token=\""$GHCR_TOKEN"\" >> terraform.tfvars 64 | echo use_oidc=true >> terraform.tfvars 65 | echo resource_group_name=\""$RESOURCE_GROUP_NAME"\" >> backend.tfvars 66 | echo storage_account_name=\"phditfstate"${CLIENT_ID:0:8}"\" >> backend.tfvars 67 | echo use_oidc=true >> backend.tfvars 68 | echo use_msi=true >> backend.tfvars 69 | echo fhir_converter_url=\"""\" >> terraform.tfvars 70 | echo ingestion_container_url=\"""\" >> terraform.tfvars 71 | echo message_parser_url=\"""\" >> terraform.tfvars 72 | az config set defaults.location=$LOCATION defaults.group=$RESOURCE_GROUP_NAME 73 | 74 | - name: Set environment 75 | id: set-environment 76 | env: 77 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 78 | run: |- 79 | if [[ "${{ github.event.inputs.environment }}" == "prod" ]]; then 80 | echo "Prod environment cannot be destroyed" 81 | exit 1 82 | fi 83 | 84 | echo "tf_env=$( 85 | if [[ "${{ github.event.inputs.environment }}" != "" ]]; then 86 | echo ${{ github.event.inputs.environment }} 87 | else 88 | echo dev 89 | fi 90 | )" >> $GITHUB_OUTPUT 91 | echo "short_cid=${CLIENT_ID:0:8}" >> $GITHUB_OUTPUT 92 | 93 | - name: Destroy environment 94 | env: 95 | ARM_CLIENT_ID: ${{ secrets.CLIENT_ID }} 96 | ARM_TENANT_ID: ${{ secrets.TENANT_ID }} 97 | ARM_SUBSCRIPTION_ID: ${{ secrets.SUBSCRIPTION_ID }} 98 | TF_ENV: ${{ steps.set-environment.outputs.tf_env }} 99 | run: | 100 | terraform init -backend-config=backend.tfvars 101 | terraform workspace select $TF_ENV || terraform workspace new $TF_ENV 102 | terraform destroy -auto-approve -lock-timeout=30m 103 | -------------------------------------------------------------------------------- /.github/workflows/end-to-end.yaml: -------------------------------------------------------------------------------- 1 | name: End-to-end test 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | environment: 7 | description: "Environment in which to conduct end to end testing." 8 | type: string 9 | required: true 10 | 11 | permissions: 12 | id-token: write 13 | contents: read 14 | packages: write 15 | jobs: 16 | e2e: 17 | name: End-to-end tests 18 | runs-on: ubuntu-latest 19 | environment: main 20 | steps: 21 | - name: Check Out Changes 22 | uses: actions/checkout@v3 23 | - name: Azure login 24 | uses: azure/login@v1 25 | with: 26 | client-id: ${{ secrets.CLIENT_ID }} 27 | tenant-id: ${{ secrets.TENANT_ID }} 28 | subscription-id: ${{ secrets.SUBSCRIPTION_ID }} 29 | - name: Azure defaults 30 | env: 31 | LOCATION: ${{ secrets.LOCATION }} 32 | RESOURCE_GROUP_NAME: ${{ secrets.RESOURCE_GROUP_NAME }} 33 | run: az config set defaults.location=$LOCATION defaults.group=$RESOURCE_GROUP_NAME 34 | - name: Set environment 35 | id: set-environment 36 | env: 37 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 38 | run: |- 39 | echo "tf_env=$( 40 | if "${{ github.event.inputs.environment }}"; then 41 | echo ${{ github.event.inputs.environment }} 42 | else 43 | echo dev 44 | fi 45 | )" >> $GITHUB_OUTPUT 46 | echo "short_cid=${CLIENT_ID:0:8}" >> $GITHUB_OUTPUT 47 | - name: Upload sample data 48 | id: upload-sample-data 49 | env: 50 | TF_ENV: ${{ steps.set-environment.outputs.tf_env }} 51 | SHORT_CID: ${{ steps.set-environment.outputs.short_cid }} 52 | run: | 53 | TIMESTAMP=$(date "+%Y-%m-%dT%H:%M:%S") 54 | echo "timestamp=$TIMESTAMP" >> $GITHUB_OUTPUT 55 | az storage blob upload --account-name phdi${TF_ENV}phi${SHORT_CID} --container-name source-data --name elr/ELR_e2e_sample_1_$TIMESTAMP.hl7 --file sample-data/ELR_e2e_sample_1.hl7 56 | - name: Check pipeline run 57 | env: 58 | TF_ENV: ${{ steps.set-environment.outputs.tf_env }} 59 | SHORT_CID: ${{ steps.set-environment.outputs.short_cid }} 60 | TIMESTAMP: ${{ steps.upload-sample-data.outputs.timestamp }} 61 | run: | 62 | az extension add --name datafactory --upgrade 63 | START_DATE=$(date --date="1 day ago" "+%Y-%m-%dT%H:%M:%S") 64 | END_DATE=$(date --date="1 day" "+%Y-%m-%dT%H:%M:%S") 65 | SOURCE_FILE="source-data/elr/ELR_e2e_sample_1_$TIMESTAMP.hl7" 66 | CHECK_COUNT=0 67 | 68 | check_pipeline_run_count() { 69 | az datafactory pipeline-run query-by-factory --factory-name "phdi-$TF_ENV-data-factory-$SHORT_CID" --filters operand="PipelineName" operator="Equals" values="phdi-$TF_ENV-ingestion" --filters operand="Status" operator="Equals" values="$1" --last-updated-after "$TIMESTAMP" --last-updated-before "$END_DATE" --query "value" | jq --arg SOURCE_FILE "$SOURCE_FILE" 'map(select(.parameters.filename == $SOURCE_FILE)) | length' 70 | } 71 | 72 | check_pipeline_success_count() { 73 | check_pipeline_run_count "Succeeded" 74 | } 75 | 76 | check_pipeline_failure_count() { 77 | check_pipeline_run_count "Failed" 78 | } 79 | 80 | while [[ "$(check_pipeline_success_count)" -lt 1 ]]; do 81 | if [[ "$CHECK_COUNT" -gt 60 || "$(check_pipeline_failure_count)" -gt 0 ]]; then 82 | echo "Pipeline run failed" 83 | exit 1 84 | fi 85 | echo "Waiting for pipeline to complete run..." 86 | sleep 10 87 | CHECK_COUNT=$((CHECK_COUNT+1)) 88 | done 89 | 90 | echo "Pipeline run succeeded!" 91 | 92 | - name: Query FHIR server 93 | env: 94 | TF_ENV: ${{ steps.set-environment.outputs.tf_env }} 95 | SHORT_CID: ${{ steps.set-environment.outputs.short_cid }} 96 | TIMESTAMP: ${{ steps.upload-sample-data.outputs.timestamp }} 97 | run: | 98 | TOKEN=$(az account get-access-token --resource=https://${TF_ENV}${SHORT_CID}-fhir-server.fhir.azurehealthcareapis.com --query accessToken --output tsv) 99 | RESPONSE=$(curl -X GET --header "Authorization: Bearer $TOKEN" "https://${TF_ENV}${SHORT_CID}-fhir-server.fhir.azurehealthcareapis.com/Patient?family=SHEPARD&given=JOHN") 100 | echo $RESPONSE | jq -e '.entry[0].resource.name[0].family == "SHEPARD" and .entry[0].resource.name[0].given[0] == "JOHN"' 101 | PATIENT_ID=$(echo $RESPONSE | jq -r '.entry[0].resource.id') 102 | echo "FHIR server query succeeded! Deleting sample data..." 103 | az storage blob delete --account-name phdi${TF_ENV}phi${SHORT_CID} --container-name source-data --name elr/ELR_e2e_sample_1_$TIMESTAMP.hl7 104 | curl -X DELETE --header "Authorization: Bearer $TOKEN" "https://${TF_ENV}${SHORT_CID}-fhir-server.fhir.azurehealthcareapis.com/Patient?identifier=${PATIENT_ID}&hardDelete=true" 105 | -------------------------------------------------------------------------------- /.github/workflows/terraformChecks.yaml: -------------------------------------------------------------------------------- 1 | name: Terraform checks 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | environment: 6 | description: "Environment to deploy to" 7 | type: environment 8 | required: true 9 | pull_request: 10 | branches: 11 | - "**" 12 | paths: 13 | - "terraform/**" 14 | 15 | jobs: 16 | check-terraform-formatting: 17 | runs-on: ubuntu-latest 18 | defaults: 19 | run: 20 | working-directory: ./terraform 21 | steps: 22 | - uses: actions/checkout@v2 23 | - uses: hashicorp/setup-terraform@v2 24 | - name: Check format 25 | run: terraform fmt -check -recursive 26 | check-terraform-validity: 27 | runs-on: ubuntu-latest 28 | defaults: 29 | run: 30 | working-directory: ./terraform 31 | env: 32 | TERRAFORM_DIRS: | 33 | setup implementation 34 | steps: 35 | - uses: actions/checkout@v3 36 | - uses: hashicorp/setup-terraform@v2 37 | - name: Terraform Init 38 | run: | 39 | for d in $TERRAFORM_DIRS 40 | do 41 | echo "Initializing $d"; 42 | (cd $d && terraform init -backend=false) 43 | done 44 | - name: Terraform Validate 45 | run: | 46 | for d in $TERRAFORM_DIRS 47 | do 48 | echo "Validating $d"; 49 | (cd $d && terraform validate) 50 | done 51 | # check-terraform-plan: 52 | # runs-on: ubuntu-latest 53 | # environment: main 54 | # defaults: 55 | # run: 56 | # working-directory: ./terraform/implementation 57 | # permissions: 58 | # contents: "read" 59 | # id-token: "write" 60 | # steps: 61 | # - name: Check Out Changes 62 | # uses: actions/checkout@v3 63 | 64 | # - name: Setup Terraform 65 | # uses: hashicorp/setup-terraform@v2 66 | 67 | # - name: "Azure login" 68 | # uses: azure/login@v1 69 | # with: 70 | # client-id: ${{ secrets.CLIENT_ID }} 71 | # tenant-id: ${{ secrets.TENANT_ID }} 72 | # subscription-id: ${{ secrets.SUBSCRIPTION_ID }} 73 | 74 | # - name: Load input variables 75 | # env: 76 | # SUBSCRIPTION_ID: ${{ secrets.SUBSCRIPTION_ID }} 77 | # LOCATION: ${{ secrets.LOCATION }} 78 | # RESOURCE_GROUP_NAME: ${{ secrets.RESOURCE_GROUP_NAME }} 79 | # SMARTY_AUTH_ID: ${{ secrets.SMARTY_AUTH_ID }} 80 | # SMARTY_AUTH_TOKEN: ${{ secrets.SMARTY_AUTH_TOKEN }} 81 | # run: | 82 | # echo subscription_id=\""$SUBSCRIPTION_ID"\" >> terraform.tfvars 83 | # echo location=\""$LOCATION"\" >> terraform.tfvars 84 | # echo resource_group_name=\""$RESOURCE_GROUP_NAME"\" >> terraform.tfvars 85 | # echo smarty_auth_id=\""$SMARTY_AUTH_ID"\" >> terraform.tfvars 86 | # echo smarty_auth_token=\""$SMARTY_AUTH_TOKEN"\" >> terraform.tfvars 87 | # echo resource_group_name=\""$RESOURCE_GROUP_NAME"\" >> backend.tfvars 88 | # echo storage_account_name=\"phditfstate"${SUBSCRIPTION_ID:0:8}"\" >> backend.tfvars 89 | 90 | # - name: Set environment 91 | # run: |- 92 | # echo "TF_ENVIRONMENT=$( 93 | # if "${{ github.event.inputs.environment }}"; then 94 | # echo ${{ github.event.inputs.environment }} 95 | # else 96 | # echo dev 97 | # fi 98 | # )" >> $GITHUB_ENV 99 | 100 | # - name: terraform 101 | # env: 102 | # ARM_CLIENT_ID: ${{ secrets.CLIENT_ID }} 103 | # ARM_TENANT_ID: ${{ secrets.TENANT_ID }} 104 | # ARM_SUBSCRIPTION_ID: ${{ secrets.SUBSCRIPTION_ID }} 105 | # run: | 106 | # terraform init -backend-config=backend.tfvars 107 | # terraform workspace select ${{ env.TF_ENVIRONMENT }} || terraform workspace new ${{ env.TF_ENVIRONMENT }} 108 | # terraform plan 109 | -------------------------------------------------------------------------------- /.github/workflows/terraformSetup.yaml: -------------------------------------------------------------------------------- 1 | name: Terraform Setup 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | permissions: 7 | id-token: write 8 | contents: read 9 | jobs: 10 | setup_environment: 11 | name: Setup a new Azure environment by creating a tfstate bucket 12 | runs-on: ubuntu-latest 13 | environment: main 14 | defaults: 15 | run: 16 | shell: bash 17 | working-directory: ./terraform/setup 18 | steps: 19 | - name: Check Out Changes 20 | uses: actions/checkout@v3 21 | 22 | - name: Setup Terraform 23 | uses: hashicorp/setup-terraform@v2 24 | 25 | - name: "Azure login" 26 | uses: azure/login@v1 27 | with: 28 | client-id: ${{ secrets.CLIENT_ID }} 29 | tenant-id: ${{ secrets.TENANT_ID }} 30 | subscription-id: ${{ secrets.SUBSCRIPTION_ID }} 31 | 32 | - name: Load input variables 33 | env: 34 | SUBSCRIPTION_ID: ${{ secrets.SUBSCRIPTION_ID }} 35 | LOCATION: ${{ secrets.LOCATION }} 36 | RESOURCE_GROUP_NAME: ${{ secrets.RESOURCE_GROUP_NAME }} 37 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 38 | run: | 39 | echo subscription_id=\""$SUBSCRIPTION_ID"\" >> terraform.tfvars 40 | echo location=\""$LOCATION"\" >> terraform.tfvars 41 | echo resource_group_name=\""$RESOURCE_GROUP_NAME"\" >> terraform.tfvars 42 | echo client_id=\""$CLIENT_ID"\" >> terraform.tfvars 43 | 44 | - name: terraform 45 | env: 46 | ARM_CLIENT_ID: ${{ secrets.CLIENT_ID }} 47 | ARM_TENANT_ID: ${{ secrets.TENANT_ID }} 48 | ARM_SUBSCRIPTION_ID: ${{ secrets.SUBSCRIPTION_ID }} 49 | run: | 50 | terraform init 51 | terraform apply -auto-approve -lock-timeout=30m 52 | -------------------------------------------------------------------------------- /.github/workflows/testPython.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | branches: 6 | - "**" 7 | push: 8 | branches: 9 | - main 10 | 11 | env: 12 | TEST_RUNNER_PYTHON_VERSION: 3.9 13 | 14 | jobs: 15 | unit-test-python-functions: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v2 20 | - name: Setup python ${{env.TEST_RUNNER_PYTHON_VERSION}} 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: ${{env.TEST_RUNNER_PYTHON_VERSION}} 24 | cache: pip 25 | - name: Install Pytest 26 | run: pip install pytest 27 | - name: Install dependencies 28 | working-directory: ./serverless-functions 29 | run: | 30 | pip install -r requirements.txt 31 | - name: Run unit tests for serverless-functions 32 | working-directory: ./serverless-functions 33 | run: | 34 | python -m pytest 35 | 36 | code-check-python: 37 | runs-on: ubuntu-latest 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v2 41 | - name: Setup python ${{env.TEST_RUNNER_PYTHON_VERSION}} 42 | uses: actions/setup-python@v4 43 | with: 44 | python-version: ${{env.TEST_RUNNER_PYTHON_VERSION}} 45 | cache: pip 46 | - name: Install dependencies 47 | run: | 48 | pip install -U pip 49 | pip install black flake8 50 | - name: Check format (black) 51 | run: | 52 | black --check --verbose . 53 | - name: Check style (flake8) 54 | run: | 55 | flake8 56 | ensure-clean-notebooks: 57 | runs-on: ubuntu-latest 58 | steps: 59 | - name: Checkout 60 | uses: actions/checkout@v3 61 | - name: Check notebook cleanliness 62 | uses: ResearchSoftwareActions/EnsureCleanNotebooksAction@1.1 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build output 2 | target/ 3 | *.class 4 | 5 | # Log file 6 | *.log 7 | 8 | # BlueJ files 9 | *.ctxt 10 | 11 | # Mobile Tools for Java (J2ME) 12 | .mtj.tmp/ 13 | 14 | # Package Files # 15 | *.jar 16 | *.war 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | 25 | # IDE 26 | .idea/ 27 | *.iml 28 | 29 | # macOS 30 | .DS_Store 31 | 32 | # Python 33 | .venv 34 | .dev_venv 35 | .python-version 36 | .ipynb_checkpoints 37 | 38 | # Azure Functions 39 | *local.settings.json 40 | bin/ 41 | obj/ 42 | extensions.csproj 43 | 44 | # Local Data Storage 45 | __azurite* 46 | __blobstorage__ 47 | __pycache__/ 48 | .pytest_cache/ 49 | data/ 50 | 51 | # Jetbrains IDE settings 52 | /.idea 53 | 54 | .DS_Store 55 | 56 | # Azure Application Creds 57 | appcreds.json 58 | # Terraform 59 | .terraform 60 | .terraform.lock.hcl 61 | .infracost 62 | terraform.tfstate 63 | terraform.tfstate.d 64 | terraform.tfstate.backup 65 | *.tfvars 66 | .vscode/settings.json 67 | credentials.json 68 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome! 2 | Thank you for contributing to CDC's Open Source projects! If you have any 3 | questions or doubts, don't be afraid to send them our way. We appreciate all 4 | contributions, and we are looking forward to fostering an open, transparent, and 5 | collaborative environment. 6 | 7 | Before contributing, we encourage you to also read or [LICENSE](https://github.com/CDCgov/template/blob/master/LICENSE), 8 | [README](https://github.com/CDCgov/template/blob/master/README.md), and 9 | [code-of-conduct](https://github.com/CDCgov/template/blob/master/code-of-conduct.md) 10 | files, also found in this repository. If you have any inquiries or questions not 11 | answered by the content of this repository, feel free to [contact us](mailto:surveillanceplatform@cdc.gov). 12 | 13 | ## Public Domain 14 | This project is in the public domain within the United States, and copyright and 15 | related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/). 16 | All contributions to this project will be released under the CC0 dedication. By 17 | submitting a pull request you are agreeing to comply with this waiver of 18 | copyright interest. 19 | 20 | ## Requesting Changes 21 | Our pull request/merging process is designed to give the CDC Surveillance Team 22 | and other in our space an opportunity to consider and discuss any suggested 23 | changes. This policy affects all CDC spaces, both on-line and off, and all users 24 | are expected to abide by it. 25 | 26 | ### Open an issue in the repository 27 | If you don't have specific language to submit but would like to suggest a change 28 | or have something addressed, you can open an issue in this repository. Team 29 | members will respond to the issue as soon as possible. 30 | 31 | ### Submit a pull request 32 | If you would like to contribute, please submit a pull request. In order for us 33 | to merge a pull request, it must: 34 | * Be at least seven days old. Pull requests may be held longer if necessary 35 | to give people the opportunity to assess it. 36 | * Receive a +1 from a majority of team members associated with the request. 37 | If there is significant dissent between the team, a meeting will be held to 38 | discuss a plan of action for the pull request. 39 | -------------------------------------------------------------------------------- /docs/DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | # DISCLAIMER 2 | Use of this service is limited only to **non-sensitive and publicly available 3 | data**. Users must not use, share, or store any kind of sensitive data like 4 | health status, provision or payment of healthcare, Personally Identifiable 5 | Information (PII) and/or Protected Health Information (PHI), etc. under **ANY** 6 | circumstance. 7 | 8 | Administrators for this service reserve the right to moderate all information 9 | used, shared, or stored with this service at any time. Any user that cannot 10 | abide by this disclaimer and Code of Conduct may be subject to action, up to 11 | and including revoking access to services. 12 | 13 | The material embodied in this software is provided to you "as-is" and without 14 | warranty of any kind, express, implied or otherwise, including without 15 | limitation, any warranty of fitness for a particular purpose. In no event shall 16 | the Centers for Disease Control and Prevention (CDC) or the United States (U.S.) 17 | government be liable to you or anyone else for any direct, special, incidental, 18 | indirect or consequential damages of any kind, or any damages whatsoever, 19 | including without limitation, loss of profit, loss of use, savings or revenue, 20 | or the claims of third parties, whether or not CDC or the U.S. government has 21 | been advised of the possibility of such loss, however caused and on any theory 22 | of liability, arising out of or in connection with the possession, use or 23 | performance of this software. 24 | -------------------------------------------------------------------------------- /docs/azure-select-source-data-container.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/azure-select-source-data-container.png -------------------------------------------------------------------------------- /docs/basic-guide-to-fhir-api-calls.md: -------------------------------------------------------------------------------- 1 | 2 | ### Basic guide to FHIR Server REST calls: 3 | Once you have data in your FHIR server you can access it through REST API calls. Below are SOME examples of calls you can make. 4 | For additional information visit the [Microsoft FHIR API docs](https://learn.microsoft.com/en-us/azure/healthcare-apis/fhir/fhir-rest-api-capabilities) and/or the [HL7 FHIR Docs site](https://build.fhir.org/). 5 | 6 | #### Prerequisites: 7 | - Access to Azure FHIR server 8 | 9 | - Azure cli tool 10 | 11 | - Auth Bearer token (autoexpires and may need regeneration every 60~90 mins) 12 | 13 | - Generate Token by logging in with with `az login` then 14 | 15 | `az account get-access-token --resource={{FHIR-server-url}} --query accessToken --output tsv` 16 | 17 | - An API tool - Postman, Insomnia, or curl. Make sure to set up your auth header using the generated token. 18 | - Postman Example Auth Header key value pair: `Authorization: Bearer ` 19 | - The end of Step 4 of this guide has a CURL example 20 | 21 | - Curl request example from command line: `curl -X GET --header "Authorization: Bearer " /Patient` 22 | 23 | - Shortcut for Postman Users: 24 | - You can import the phdi-fhir-api-template.postman_collection.json in this directory into Postman to auto-load this collection. 25 | - In Postman, import the collection with the Import button. 26 | - Click the three dots by the collection name and select Edit 27 | - Add your generated authorization token to the Authorization tab and your FHIR server url to the variables tab. 28 | - Update any and all variables in the variable tab for requests you want to try 29 | - DON'T FORGET TO HIT SAVE IN THE TOP RIGHT AFTER DOING THIS or Postman will not apply the changes. The save button may be obscured by the Collection Details panel, so close that if you can't find save 30 | 31 | #### Gotchas: 32 | Watch out for capitalization in urls, for example: 33 | {{FHIR-server-url}}/Patient not {FHIR-server-url}/patient 34 | 35 | GET Metadata, does not require auth, good for health checking the server 36 | `{{FHIR-server-url}}/Metadata` 37 | 38 | ###Basic Templates 39 | ####GET Requests 40 | Template format to GET all of a certain resource: 41 | `{{FHIR-server-url}}/{{Resource Name}}` 42 | 43 | Examples: 44 | 45 | GET all Patients: 46 | `{{FHIR-server-url}}/Patient` 47 | 48 | GET all immunizations: 49 | `{{FHIR-server-url}}/Immunization` 50 | 51 | 52 | Template format to GET a specific resource by id: 53 | `{{FHIR-server-url}}/{{Resource Name}}/` 54 | 55 | Example: 56 | 57 | GET a specific Patient by id: 58 | `{{FHIR-server-url}}/Patient/` 59 | 60 | Template DORMAT to GET all resources associated with a specific resource: 61 | `{{FHIR-server-url}}/{{Resource Name}}//{{Other Resource Name}}` 62 | 63 | Example: 64 | 65 | GET all Observations associated with a Patient ID: 66 | `{{FHIR-server-url}}/Patient//Observation` 67 | 68 | Template format resources with specific fields. Can be chained with multiple search fields and values using &: 69 | `{{FHIR-server-url}}/{{Resource Name}}?{{first search field name}}={{search value}}&{{second search field name}}={{search value}}` 70 | 71 | Examples: 72 | 73 | GET all patients named John Doe: 74 | `{{FHIR-server-url}}/Patient?family=DOE&given=JOHN` 75 | 76 | GET all Immunizations associated with a Patient ID: 77 | `{{FHIR-server-url}}/Patient//Immunization` 78 | 79 | Template for chaining resource references using nested objects: 80 | `{{FHIR-server-url}}/{{Resource Name}}?{{object.key}}={{search value}}` 81 | 82 | Example: 83 | 84 | GET all immunizations for Patients with a specific family name: 85 | `{{FHIR-server-url}}/Immunization?subject.family=` 86 | 87 | ####PUT 88 | Template for modifying resource with PUT: 89 | `{{FHIR-server-url}}/{{Resource Name}}/` 90 | 91 | Example: 92 | 93 | Update a patient with PUT: 94 | `{{FHIR-server-url}}/Patient/` 95 | Request Body: RAW JSON Patient object 96 | 97 | ####POST 98 | Template for adding a resource: 99 | `{{FHIR-server-url}}/{{Resource Name}}/` 100 | `Request Body: Raw JSON resource object` 101 | 102 | Example: 103 | 104 | Add a patient with POST: 105 | `{{FHIR-server-url}}/Patient` 106 | Request Body: RAW JSON Patient object 107 | 108 | ####DELETE 109 | Template for removing a resource: 110 | `{{FHIR-server-url}}/{{Resource Name}}/` 111 | 112 | Example: 113 | 114 | Remove a patient with DELETE: 115 | `{{FHIR-server-url}}/Patient/` 116 | 117 | ###Operations to include referenced resources 118 | ####_include 119 | GET a resource, AND resource(s) referenced within it 120 | 121 | Template to join resources using _include: 122 | `{{FHIR-server-url}}/{{Resource Name}}?_include={{Resource Name}}:{{matching reference type}}` 123 | 124 | Examples: 125 | 126 | GET all Observation Resources AND the Patient resource referenced within: 127 | `{{FHIR-server-url}}/Observation?_include=Observation:subject` 128 | 129 | GET a specific Observation Resource AND the Performer referenced within: 130 | `{{FHIR-server-url}}/Observation?_id=&_include=Observation:performer` 131 | 132 | GET a Claim resource by ID AND the Provider referenced within: 133 | `{{FHIR-server-url}}/Claim?_id=&_include=Claim:provider` 134 | 135 | ####_revinclude 136 | GET a resource AND resource(s) referencing that resource 137 | 138 | Template to join resources using _revinclude: 139 | `{{FHIR-server-url}}/{{Resource Name}}?_include={{Referencing Resource Name}}:{{matching reference type}}` 140 | 141 | Examples: 142 | 143 | GET all Patient resources AND all Observation resources referencing those Patients: 144 | `{{FHIR-server-url}}/Patient?_revinclude=Observation:subject` 145 | 146 | GET a Patient Resource by id AND all Immunization resources associated with that Patient: 147 | `{{FHIR-server-url}}/Patient?_id=&_revinclude=Immunization:patient` 148 | 149 | GET a Practitioner Resource by id AND all Encounters referencing that Practitioner: 150 | `{{FHIR-server-url}}/Practitioner?_id=&_revinclude=Encounter:practitioner` 151 | 152 | ###Additional Modifiers 153 | 154 | Add a total found record count: 155 | `&_total=accurate` 156 | 157 | Set number of items returned per page: 158 | `?_count=` 159 | 160 | De-paginates a request response, if the bundle size would be paginated: 161 | `$everything` 162 | -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Creating a Culture of Innovation 2 | We aspire to create a culture where people work joyfully, communicate openly 3 | about things that matter, and provide great services globally. We would like our 4 | team and communities (both government and private sector) to reflect on 5 | diversity of all kinds, not just the classes protected in law. Diversity fosters 6 | innovation. Diverse teams are creative teams. We need a diversity of perspective 7 | to create solutions for the challenges we face. 8 | 9 | This is our code of conduct (adapted from [18F's Code of Conduct](https://github.com/18F/code-of-conduct)). 10 | We follow all Equal Employment Opportunity laws and we expect everyone we work 11 | with to adhere to the [GSA Anti-harrasment Policy](http://www.gsa.gov/portal/directive/d0/content/512516), 12 | even if they do not work for the Centers for Disease Control and Prevention or 13 | GSA. We expect every user to follow this code of conduct and the laws and 14 | policies mentioned above. 15 | 16 | ## Be Empowering 17 | Consider what you can do to encourage and support others. Make room for quieter 18 | voices to contribute. Offer support and enthusiasm for great ideas. Leverage the 19 | low cost of experimentation to support your colleagues' ideas, and take care to 20 | acknowledge the original source. Look for ways to contribute and collaborate, 21 | even in situations where you normally wouldn't. Share your knowledge and skills. 22 | Prioritize access for and input from those who are traditionally excluded from 23 | the civic process. 24 | 25 | ## Rules of Behavior 26 | * I understand that I must complete security awareness and records management 27 | training annually in order to comply with the latest security and records 28 | management policies. 29 | * I understand that I must also follow the [Rules of Behavior for use of HHS Information Resources](http://www.hhs.gov/ocio/policy/hhs-rob.html) 30 | * I understand that I must not use, share, or store any kind of sensitive data 31 | (health status, provision or payment of healthcare, PII, etc.) under ANY 32 | circumstance. 33 | * I will not knowingly conceal, falsify, or remove information. 34 | * I understand that I can only use non-sensitive and/or publicly available 35 | data. 36 | * I understand that all passwords I create to set up accounts need to comply 37 | with CDC's password policy. 38 | * I understand that the stewards reserves the right to moderate all data at any 39 | time. 40 | 41 | ## Boundaries 42 | Create boundaries to your own behavior and consider how you can create a safe 43 | space that helps prevent unacceptable behavior by others. We can't list all 44 | instances of unacceptable behavior, but we can provide examples to help guide 45 | our community in thinking through how to respond when we experience these types 46 | of behavior, whether directed at ourselves or others. 47 | 48 | If you are unsure if something is appropriate behavior, it probably is not. Each 49 | person we interact with can define where the line is for them. Impact matters 50 | more than intent. Ensuring that your behavior does not have a negative impact is 51 | your responsibility. Problems usually arise when we assume that our way of 52 | thinking or behavior is the norm for everyone. 53 | 54 | ### Here are some examples of unacceptable behavior 55 | * Negative or offensive remarks based on the protected classes as listed in the 56 | GSA Anti-harrasment Policy of race, religion, color, sex, national origin, 57 | age, disability, genetric information, sexual orientation, gender identity, 58 | parental status, maritual status, and political affiliation as well as gender 59 | expression, mental illness, socioeconomic status or backgrounds, 60 | neuro(a)typicality, physical appearance, body size, or clothing. Consider 61 | that calling attention to differences can feel alienating. 62 | * Sustained disruption of meetings, talks, or discussions, including chatrooms. 63 | * Patronizing language or behavior. 64 | * Aggresive behavior, such as unconstructive criticism, providing correction 65 | that do not improve the conversation (sometimes referred to as "well 66 | actually's"), repeatedly interrupting or talking over someone else, feigning 67 | surprise at someone's lack of knowledge or awareness about a topic, or subtle 68 | prejudice. 69 | * Referring to people in a way that misidentifies their gender and/or rejects 70 | the validity of their gender identity; for instance by using incorrect 71 | pronouns or forms of address (misgendering). 72 | * Retaliating against anyone who files a formal complaint that someone has 73 | violated these codes or laws. 74 | 75 | ## Background 76 | CDC Scientific Clearance is the process of obtaining approvals by appropriate 77 | CDC officials before a CDC information product is released to the public or 78 | CDC's external public health partners. Information products that require formal 79 | clearance include print, electronic, or oral materials, that CDC employees 80 | author or co-author, whether published by CDC or outside CDC. CDC contractors 81 | developing content on behalf of CDC for the public or CDC's external public 82 | health partners are also required to put their content through the formal 83 | clearance process. The collaborative functions related to the projects include 84 | blogs, wikis, forums, bug tracking sites, source control and 85 | others deemed as necessary. 86 | 87 | For those individuals within the CDC, adherence to the following policies are 88 | required: 89 | * CDC ["Clearance of Information Products Disseminated Outside CDC for Public Use"](http://www.cdc.gov/maso/Policy/PublicUse.pdf) 90 | * HHS ["Ensuring the Quality of Information Disseminated by HHS agencies"](http://aspe.hhs.gov/infoquality) 91 | 92 | All collaborative materials will be controlled by the rules contained within 93 | this document. This will allow for the real-time collaboration opportunities 94 | among CDC employees, CDC contractors and CDC public health partners. 95 | 96 | ## Credit 97 | This code of conduct was mainly adapted from [18F's Code of Conduct](https://github.com/18F/code-of-conduct) 98 | and the [CDC's Informatics Innovation Unit R&D Lab's code of conduct.](https://www.philab.cdc.gov/index.php/code-of-conduct/) 99 | 100 | ## Relevant Legal Considerations 101 | * [Laws enforced by the Equal Employment Opportunity Commission](http://www.eeoc.gov/laws/statutes/index.cfm) 102 | * [Types of discrimination prohibited by law](http://www.eeoc.gov/laws/types) 103 | * [New and proposed regulations](http://www.eeoc.gov/laws/regulations/index.cfm) 104 | -------------------------------------------------------------------------------- /docs/images/LA-detailed-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/LA-detailed-arch.png -------------------------------------------------------------------------------- /docs/images/LA-high-level-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/LA-high-level-arch.png -------------------------------------------------------------------------------- /docs/images/azure-browse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-browse.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-copy-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-copy-code.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-create-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-create-storage.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-logged-in-terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-logged-in-terminal.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-logged-in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-logged-in.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-select-bash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-select-bash.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-shell-bash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-shell-bash.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-shell-login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-shell-login.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-shell-ready.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-shell-ready.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-shell-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-shell-storage.png -------------------------------------------------------------------------------- /docs/images/azure-cloud-shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud-shell.png -------------------------------------------------------------------------------- /docs/images/azure-cloud_set_FHIR_variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-cloud_set_FHIR_variable.png -------------------------------------------------------------------------------- /docs/images/azure-containers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-containers.png -------------------------------------------------------------------------------- /docs/images/azure-data-factory-error-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-data-factory-error-button.png -------------------------------------------------------------------------------- /docs/images/azure-data-factory-error-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-data-factory-error-details.png -------------------------------------------------------------------------------- /docs/images/azure-data-factory-failure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-data-factory-failure.png -------------------------------------------------------------------------------- /docs/images/azure-data-factory-launch-studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-data-factory-launch-studio.png -------------------------------------------------------------------------------- /docs/images/azure-device-login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-device-login.png -------------------------------------------------------------------------------- /docs/images/azure-fhir-api-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-fhir-api-endpoint.png -------------------------------------------------------------------------------- /docs/images/azure-fhir-api-response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-fhir-api-response.png -------------------------------------------------------------------------------- /docs/images/azure-fhir-api-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-fhir-api-search.png -------------------------------------------------------------------------------- /docs/images/azure-fhir-server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-fhir-server.png -------------------------------------------------------------------------------- /docs/images/azure-filter-storage-accounts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-filter-storage-accounts.png -------------------------------------------------------------------------------- /docs/images/azure-find-fhir-server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-find-fhir-server.png -------------------------------------------------------------------------------- /docs/images/azure-ingestion-single-execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-ingestion-single-execution.png -------------------------------------------------------------------------------- /docs/images/azure-pipeline-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-pipeline-diagram.png -------------------------------------------------------------------------------- /docs/images/azure-pipeline-select-monitor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-pipeline-select-monitor.png -------------------------------------------------------------------------------- /docs/images/azure-portal-check-account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-portal-check-account.png -------------------------------------------------------------------------------- /docs/images/azure-portal-resource-group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-portal-resource-group.png -------------------------------------------------------------------------------- /docs/images/azure-portal-resource-groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-portal-resource-groups.png -------------------------------------------------------------------------------- /docs/images/azure-portal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-portal.png -------------------------------------------------------------------------------- /docs/images/azure-search-cloud-storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-search-cloud-storage.png -------------------------------------------------------------------------------- /docs/images/azure-search-data-factories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-search-data-factories.png -------------------------------------------------------------------------------- /docs/images/azure-search-fhir-viewer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-search-fhir-viewer.png -------------------------------------------------------------------------------- /docs/images/azure-select-containers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-containers.png -------------------------------------------------------------------------------- /docs/images/azure-select-fhir-store.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-fhir-store.png -------------------------------------------------------------------------------- /docs/images/azure-select-ingestion-pipeline-in-studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-ingestion-pipeline-in-studio.png -------------------------------------------------------------------------------- /docs/images/azure-select-ingestion-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-ingestion-pipeline.png -------------------------------------------------------------------------------- /docs/images/azure-select-phi-bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-phi-bucket.png -------------------------------------------------------------------------------- /docs/images/azure-select-source-data-container.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-source-data-container.png -------------------------------------------------------------------------------- /docs/images/azure-select-vxu-folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-select-vxu-folder.png -------------------------------------------------------------------------------- /docs/images/azure-starter-kit-arch.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-starter-kit-arch.drawio.png -------------------------------------------------------------------------------- /docs/images/azure-upload-file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-upload-file.png -------------------------------------------------------------------------------- /docs/images/azure-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-upload.png -------------------------------------------------------------------------------- /docs/images/azure-workflow-source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/azure-workflow-source.png -------------------------------------------------------------------------------- /docs/images/create-environment-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/create-environment-1.png -------------------------------------------------------------------------------- /docs/images/create-environment-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/create-environment-2.png -------------------------------------------------------------------------------- /docs/images/create-environment-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/create-environment-3.png -------------------------------------------------------------------------------- /docs/images/deployment-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/deployment-1.png -------------------------------------------------------------------------------- /docs/images/deployment-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/deployment-2.png -------------------------------------------------------------------------------- /docs/images/deployment-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/deployment-3.png -------------------------------------------------------------------------------- /docs/images/edit-secrets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/edit-secrets.png -------------------------------------------------------------------------------- /docs/images/filter-to-workspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/filter-to-workspace.png -------------------------------------------------------------------------------- /docs/images/fork-repo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/fork-repo-1.png -------------------------------------------------------------------------------- /docs/images/fork-repo-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/fork-repo-2.png -------------------------------------------------------------------------------- /docs/images/high-level-phdi-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/high-level-phdi-arch.png -------------------------------------------------------------------------------- /docs/images/make-new-env-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/make-new-env-1.png -------------------------------------------------------------------------------- /docs/images/make-new-env-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/make-new-env-2.png -------------------------------------------------------------------------------- /docs/images/manage-deleted-vaults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/manage-deleted-vaults.png -------------------------------------------------------------------------------- /docs/images/navigate-to-actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/navigate-to-actions.png -------------------------------------------------------------------------------- /docs/images/navigate-to-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/navigate-to-settings.png -------------------------------------------------------------------------------- /docs/images/quick-start-ready.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/quick-start-ready.png -------------------------------------------------------------------------------- /docs/images/repo-secret-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/repo-secret-2.png -------------------------------------------------------------------------------- /docs/images/repo-secret-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/repo-secret-3.png -------------------------------------------------------------------------------- /docs/images/repo-secret-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/repo-secret-4.png -------------------------------------------------------------------------------- /docs/images/resource-group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/resource-group.png -------------------------------------------------------------------------------- /docs/images/terraform-setup-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/terraform-setup-1.png -------------------------------------------------------------------------------- /docs/images/terraform-setup-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/terraform-setup-2.png -------------------------------------------------------------------------------- /docs/images/terraform-setup-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/terraform-setup-3.png -------------------------------------------------------------------------------- /docs/images/trigger-deployment-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/trigger-deployment-1.png -------------------------------------------------------------------------------- /docs/images/trigger-deployment-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/trigger-deployment-2.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-1.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-10.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-11.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-12.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-13.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-14.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-15.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-2.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-3.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-4.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-5.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-6.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-7.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-8.png -------------------------------------------------------------------------------- /docs/images/troubleshooting-guide-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/docs/images/troubleshooting-guide-9.png -------------------------------------------------------------------------------- /docs/implementation-guide.md: -------------------------------------------------------------------------------- 1 | # PHDI Azure Implementation Guide 2 | 3 | - [PHDI Azure Implementation Guide](#phdi-azure-implementation-guide) 4 | - [Overview](#overview) 5 | - [What is PHDI?](#what-is-phdi) 6 | - [What are Building Blocks?](#what-are-building-blocks) 7 | - [PHDI Starter Kit Architecture](#phdi-starter-kit-architecture) 8 | - [Ingestion Pipeline](#ingestion-pipeline) 9 | - [Tabulation Service](#tabulation-service) 10 | - [Additional References](#additional-references) 11 | - [Implementing the PHDI Starter Kit in Azure](#implementing-the-phdi-starter-kit-in-azure) 12 | - [User Requirements](#user-requirements) 13 | - [Step 1: Ensure You Have Collected Values for Geocoding Variables](#step-1-ensure-you-have-collected-values-for-geocoding-variables) 14 | - [Step 2: Run the Quick Start Script in Azure Cloud Shell](#step-2-run-the-quick-start-script-in-azure-cloud-shell) 15 | 16 | ## Overview 17 | This document offers a detailed guide for implementing the PHDI Starter Kit pipeline in an Azure environment. 18 | 19 | ### What is PHDI? 20 | The Public Health Data Infrastructure (PHDI) project is part of the Pandemic-Ready Interoperability Modernization Effort (PRIME), a multi-year collaboration between CDC and the U.S. Digital Service (USDS) to strengthen data quality and information technology systems in state and local health departments. The PHDI project has developed a **Starter Kit data ingestion pipeline**, built from modular software tools known as **Building Blocks**, which can be combined in multiple configurations to create data pipelines. The purpose of this repository is to help users deploy the Building Blocks provided in the [PHDI library](https://github.com/CDCgov/phdi) as a Starter Kit pipeline in their own Azure environment. 21 | 22 | ### What are Building Blocks? 23 | PHDI's goal is to provide public health authorities (PHAs) with modern software tools to solve challenges working with public health data. We refer to these tools as Building Blocks. Some Building Blocks offer relatively simple functionality, like standardizing patient names, while others perform more complex tasks, including geocoding and standardizing addresses. Importantly, the Building Blocks have been carefully designed with common inputs and outputs making them easily composable into data pipelines. 24 | 25 | ### PHDI Starter Kit Architecture 26 | The composable nature of Building Blocks allows them to be strung together into data pipelines where each Building Block represents a single step in a pipeline. As an example, let's consider a hypothetical case where a PHA would like to improve the quality of their patient address data and ensure that patient names are written consistently. They could solve this problem by using the name standardization and geocoding Building Blocks, mentioned in the previous section, to build a simple pipeline that standardizes patients' names and geocodes their addresses. Non-standardized data would be sent into the pipeline, where it would pass through each of the Building Blocks, and then exit the pipeline with standardized name and address fields. PHAs are welcome to use Building Blocks to create their own custom pipelines. However, because many PHAs face similar challenges processing data this repository implements a basic architecture in the form of a Starter Kit. The goal of this Starter Kit is to help PHAs easily get up and running with modern modular tooling for processing public health data in the cloud. We also fully understand that all PHAs do not face the same challenges. Our intention is for STLTs to modify and expand on this Starter Kit architecture to make it fit their specific needs. The Starter Kit has two main components: an ingestion pipeline that cleans and stores data in a FHIR server, and tabulation service that allows data to be easily extracted from the FHIR server. The complete architecture for the Starter Kit is shown in the diagram below. 27 | 28 | ![Architecture Diagram](./images/azure-starter-kit-arch.drawio.png) 29 | 30 | #### Ingestion Pipeline 31 | The ingestion pipeline is intended to allow PHAs to easily bring data that is reported to them into their system after performing standardizations and enrichments. Source data can be provided in either Hl7v2 or C-CDA formats, allowing this single pipeline to manage ingestion of ELR, VXU, ADT, and eCR messages. The pipeline is able to support both data types because the initial step is to convert to FHIR. After this conversion, the pipeline is able to handle all reported data the same way by simply processing the FHIR bundles (e.g., collections of FHIR resources) that result from the conversion. Once data has be converted to FHIR, the following standardizations and enrichments are made: 32 | 1. Patient names are standardized. 33 | 2. Patient phone numbers are transformed into the ISO E.164 standard international format. 34 | 3. Patient addresses are geocoded for standardization and enrichment with latitude and longitude. 35 | 4. A hash based on a patient's name, date of birth, and address is computed to facilitate linkage with other records for the same patient. 36 | 37 | After the data has been cleaned and enriched, it is uploaded to a FHIR Store where it can serve as a single source of truth for all downstream reporting and analytics needs. 38 | 39 | #### Tabulation Service 40 | The tabulation service provides a mechanism for extracting and tabulating data from the FHIR server. Users define schemas describing the table(s) they would like to extract from the FHIR Store and submit them to the tabulation service. The service then conducts a basic Extract Transform and Load (ETL process) with the following steps: 41 | 1. Extraction - The service identifies the data required for a given schema and extracts it from the FHIR server using the FHIR API. 42 | 2. Transform - The non-tabular and nested JSON FHIR data is transformed into the tabular format specified by the schema. 43 | 3. Load - The tabulated data is loaded into a flat file format (CSV, Parquet, or SQLite) and stored in an Azure File share. The data specified in the schema is now available to downstream reporting and analytical workloads. 44 | 45 | ### Additional References 46 | We have only provided a brief overview of PHDI, Building Blocks, and the Starter Kit pipeline we have designed. For additional information, please refer to the documents linked below. 47 | - [PHDI-azure README](./README.md) 48 | - [PHDI-azure Getting Started Guide](./getting_started.md) 49 | 50 | ## Implementing the PHDI Starter Kit in Azure 51 | In this section, we describe how a PHA can take this repository and use it to spin up all of the functionality that the Starter Kit offers in their own Azure environment. 52 | 53 | Below, we will provide our Quick Start Script, which, when executed, connects your GitHub repository to your Azure instance, sets up environment variables for both, and executes the Terraform necessary to provision required resources in your Azure environment. 54 | 55 | ### User Requirements 56 | In order to proceed, you will need: 57 | 1. `Owner` access to the Azure subscription where you would like to deploy the PHDI Starter Kit. 58 | 1. A GitHub account with a verified email address. 59 | 1. To be able to create new repositories in the GitHub account or organization where your copy of this repository will be created. 60 | 61 | If you are planning to use an organization account, you must be able to authorize the GitHub CLI to interact with the organization. 62 | 63 | If you do not meet these criteria, contact the owner of your organization's Azure subscription and/or GitHub organization. 64 | 65 | ### Step 1: Ensure You Have Collected Values for Geocoding Variables 66 | 67 | Exiting the Quick Start Script partway through is not recommended, so please have all values on hand when you run the script. 68 | 69 | Required to use geocoding functionality: 70 | - `SMARTY_AUTH_ID` - Your SmartyStreet Authorization ID. Find more info on the Smarty geocoding service [here](https://www.smarty.com/pricing/us-rooftop-geocoding). 71 | - `SMARTY_AUTH_TOKEN` - Your SmartyStreet Authorization Token. 72 | 73 | Keep these values easily accessible so that they can be entered later when the script prompts for them. 74 | 75 | ### Step 2: Run the Quick Start Script in Azure Cloud Shell 76 | In this step, we will work through Azure's [Workload Identity Federation](https://learn.microsoft.com/en-us/azure/active-directory/develop/workload-identity-federation) to grant your phdi-azure repository access to deploy the pipelines to your organization's Azure environment. Below, we have provided a Quick Start Script to automate most of this process that we recommend you use. However, if you prefer to work through it manually, you may follow [this guide](https://learn.microsoft.com/en-us/azure/active-directory/develop/workload-identity-federation-create-trust?pivots=identity-wif-apps-methods-azcli). 77 | 78 | **Quick Start Script:** 79 | Navigate to the [Azure Cloud Shell](https://shell.azure.com/). 80 | 81 | Choose Bash as your shell environment: 82 | ![azure-cloud-shell-bash](./images/azure-cloud-shell-bash.png) 83 | 84 | Create storage for your Cloud Shell: 85 | ![azure-cloud-shell-storage](./images/azure-cloud-shell-storage.png) 86 | 87 | When your shell is ready to receive commands, it will look like this: 88 | ![azure-cloud-shell-ready](./images/azure-cloud-shell-ready.png) 89 | 90 | To download and run the Quick Start Script, run the following command in Cloud Shell: 91 | ```bash 92 | git clone https://github.com/CDCgov/phdi-azure.git && cd phdi-azure && ./quick-start.sh 93 | ``` 94 | 95 | When the script is ready for your input, it will look like this: 96 | ![quick-start-ready](./images/quick-start-ready.png) 97 | 98 | Press enter to begin the script. 99 | 100 | If you plan to deploy to an existing resource group in your Azure environment, have the resource group name ready and provide it to the Quick Start Script when prompted. 101 | 102 | The script will take around 20-30 minutes to run. 103 | -------------------------------------------------------------------------------- /docs/rules_of_behavior.md: -------------------------------------------------------------------------------- 1 | # Rules of Behavior and Posting Guidelines for the Use of GitHub as a Third-Party Web Application 2 | 3 | ## Purpose 4 | 5 | These rules of behavior establish the privacy and information security requirements for the use of Third Party Web Applications (TPWAs) in conjunction with the CDC GitHub.com organizations established for open source projects. These rules of behavior were developed to ensure that CDC and its confidential information and technologies are not compromised, as well as protecting general CDC interests and services from risks associated with the use of TPWAs while allowing for the increased efficiencies and cost savings that come with appropriate use of third party services. 6 | 7 | ## Scope 8 | 9 | These rules of behavior and its related guidance apply to federal employees, contractors, and all external collaborators who will access GitHub from CDC directly or use them with non-sensitive data obtained from CDC. All engagement with TPWAs related to the GitHub will be governed by these rules of behavior, as well as to the Rules of Behavior for the Use of HHS Information Services. 10 | 11 | ## Ownership 12 | 13 | CDC assigns three stewards in charge of rules and policy compliance: a Business Steward, a Security Steward, and a Technical Steward. The business and security stewards are responsible for establishing policy and providing approval, while the technical steward fulfills requests from users. Users requesting access to GitHub that have not been approved yet need to assign a main and a backup point of contact (POC) with the business steward, as well as provide a justification to the security steward. 14 | 15 | The security steward is responsible for the security of the GitHub usage as a TPWA and its impact on the CDC network and compliance with CDC security policies. All users, including POCs, are responsible for adherence to this policy and associated processes. Where there is not a rule of behavior that provides explicit guidance, users must do their best to safeguard CDC and its network and services from security risks. 16 | 17 | ## Rules of Behavior 18 | 19 | All new users of GitHub must read and acknowledge these rules before using any of the approved TPWAs. This acknowledgment must be completed annually, and establishes agreement from part of the user to adhere to these rules. 20 | 21 | * I understand that I must complete security awareness and records management training annually in order to comply with the latest security and records management policies. 22 | * I understand that I must also follow the Rules of Behavior for use of HHS Information Resources. 23 | * I understand that I must not use, share, or store any kind of sensitive data (health status, provision or payment of healthcare, pictures, PII, etc.) with TPWAs under ANY circumstance. 24 | * I will not knowingly conceal, falsify or remove information.This includes editing or removing the template language provided when a Github repository is created. 25 | * I understand that I can only use non-sensitive and/or publicly available data in GitHub. If you are unsure of what constitutes non-sensitive information, please see guidance below. 26 | * I understand that all passwords I create to set up GitHub accounts need to comply with CDC’s password policy. 27 | * I understand that the steward reserves the right to moderate all data at any time. 28 | * I understand my responsibilities to protect systems and data as specified by CDC policies. 29 | 30 | ## Guidance Regarding Non-Sensitive and Publicly Available Information 31 | 32 | In support of program collaboration in the use oF GitHub, portions of some GitHub projects are either currently open to the public or may become open to the public in the future. The following guidelines will inform and assist the user in determining that the information to be posted on GitHub is not sensitive. The bottom line is if the content you are posting is not appropriate to post for public access, it should not be posted on GitHub. 33 | 34 | Before posting information that involves other CDC programs, employees, etc. to GitHub, it is important that the poster ensures they receive approval by the relevant CDC entity to post the information. 35 | 36 | Questions to consider before posting information include: 37 | 38 | | Do I have reservations about anyone viewing this information? | Yes | Do not post. | 39 | | Were individuals informed that this information would be posted on GitHub? | No | Do not post. | 40 | | Does this information contain details or descriptions of CDC security systems or other sensitive infrastructures? | Yes | Do not post. | 41 | | Does this information reflect program efforts to engage and inform external partners and the public? | No | Do not post. | 42 | 43 | Examples of information which has been deemed not sensitive and may be posted on GitHub include the following. 44 | 45 | * Source Code 46 | * Use cases 47 | * User stories/requirements 48 | * Process flows 49 | * Program pain points 50 | * Software Service Descriptions 51 | 52 | Sensitive information, which should not be posted, includes (but is not limited to) the following. 53 | 54 | * Information directly attributed to an individual in a sensitive manner 55 | * The names or pictures of individuals 56 | * Protected health information 57 | * Project management material. This includes posting or discussing security documentation, implementation plans, communications regarding project specifics, etc. 58 | * Opinions related to programs or tools, specifically those that may have an adverse impact 59 | * Non-public Links to CDC SharePoint or other internal references 60 | * Non-public Details on CDC internal infrastructure 61 | 62 | If there’s any question on whether information may be sensitive (such as detailed interview notes or specific references provided during a program interview), further guidance should be sought from the security steward prior to posting the information on any GitHub. 63 | 64 | ## Enforcement 65 | 66 | Users looking to use GitHub that are unable to follow these rules of behavior will not have authorization to do so. Any users that violate these rules of behavior or CDC security policies may be subject to action, up to and including revoking access to GitHub. Technical and security stewards have the right to enforce these rules of behavior based on violations at any time. 67 | 68 | ## References 69 | 70 | * [Policy for Managing the Use of Third-Party Websites and Applications](https://www.hhs.gov/about/agencies/asa/ocio/cybersecurity/policy-social-media-technologies/index.html) 71 | * [Rules of Behavior for Use of HHS Information Resources](http://www.hhs.gov/ocio/policy/hhs-rob.html) 72 | * [Security and Awareness Training](http://sat.cdc.gov/) (requires login) 73 | -------------------------------------------------------------------------------- /docs/support.md: -------------------------------------------------------------------------------- 1 | # Requesting Support 2 | If you encounter issues while running the pipeline or in your own version of this repository, you can reach out to the DIBBs team for support. 3 | 4 | ## Open an issue in phdi-azure 5 | The preferred way to get in touch with the team is to open an issue on the [phdi-azure](https://github.com/CDCgov/phdi-azure) repository. Please use the [support template](https://github.com/CDCgov/phdi-azure/blob/b6f15fbccacbdc05b1512db963a19fc5a8901a25/.github/ISSUE_TEMPLATE/support-request.md) to ensure the team has adequate information to help you. 6 | 7 | Support requests are reviewed at least once per week. 8 | 9 | **Please DO NOT include any PHI/PII in support requests, as they are public.** 10 | 11 | ## Contact us 12 | If for some reason opening a Github issue is not appropriate, you can get in touch with the team at `DMIBuildingBlocks@cdc.gov`. 13 | 14 | **Please DO NOT include any PHI/PII in emails to this address, as it is a shared inbox.** 15 | -------------------------------------------------------------------------------- /docs/thanks.md: -------------------------------------------------------------------------------- 1 | # Thanks and Acknowledgements 2 | 3 | Starting this file way too late, but wanted to recognize contributions made by people who helped this repo. There are many more than this, but I should have started this file years ago. 4 | 5 | * Chris Sandlin [@cssandlin](https://github.com/cssandlin) 6 | * Drewry Morris [@drewry](https://github.com/drewry) 7 | -------------------------------------------------------------------------------- /sample-data/ELR_e2e_sample_1.hl7: -------------------------------------------------------------------------------- 1 | MSH|^~\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4 2 | PID|||1234567890^^^^MR||SHEPARD^JOHN^TIBERIUS^^^^L|ANDERSON|20531107|M|||4720 MEGA ST. NW^^NORTH CANTON^OH^44720||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520 3 | OBR|1|845439^GHH OE|1045813^GHH LAB|1554-5^GLUCOSE|||200202150730||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^LEVEL SEVEN HEALTHCARE, INC.|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD 4 | OBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F -------------------------------------------------------------------------------- /sample-data/ELR_e2e_sample_2.hl7: -------------------------------------------------------------------------------- 1 | MSH|^~\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4 2 | PID|||7894561235^^^^MR||VAS NORMANDY^TALI^ZORAH^^^^L|RAAN|20600514|F|||1100 E 9TH ST.^^CLEVELAND^OH^44114||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520 3 | OBR|1|845439^GHH OE|1045813^GHH LAB|1554-5^GLUCOSE|||200202150730||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^LEVEL SEVEN HEALTHCARE, INC.|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD 4 | OBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F -------------------------------------------------------------------------------- /sample-data/VXU-V04-01_success_single.hl7: -------------------------------------------------------------------------------- 1 | MSH|^~\&|IMMAPP|GHHSFacility^2.16.840.1.122848.1.30^ISO|EHRApp^1.Edu^ISO|GHHRFacility^2.16.840.1.1122848.1.32^ISO|202108181126+0215|SECURITY|VXU^V04^VXU_V04|MSG00001|P|2.8|||||USA||en-US|||22 GHH Inc.|23 GHH Inc.|24GHH^2.16.840.1.114884.10.20^ISO|25GHH^2.16.840.1.114884.10.23^ISO 2 | SFT|Orion|2.4.3.52854|Rhapsody|2.4.3|Testactivity|20070725111624 3 | PID|1|1234567^4^M11^test^MR^University Hospital^19241011^19241012|PATID1234^5^M11^test1&2.16.1&HCD^MR^GOOD HEALTH HOSPITAL~123456789^^^USSSA^SS|PATID567^^^test2|EVERYMAN&&&&Aniston^ADAM^A^III^Dr.^MD^D^^^19241012^^^^PF^Addsm~Josh&&&&Bing^^stanley^^^^L^^^^^19241010^19241015|SMITH^Angela^L|198808181126+0215|M|elbert^Son|2106-3^White^HL70005~2028-9^Asian^HL70005|1000&Hospital Lane^Ste. 123^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway|GL|78788788^^CP^5555^^^1111^^^^^2222^20010110^20020110^^^^18~12121212^^CP|7777^^CP~1111^^TDD|ara^^HL70296^eng^English-us^HL70296^v2^v2.1^TextInEnglish|M^Married|AME|4000776^^^AccMgr&1.3.6.1.4.1.44750.1.2.2&ISO^VN^1^19241011^19241012|PSSN123121234|DLN-123^US^20010123|1212121^^^NTH&rt23&HCD^AND^^19241011^19241012|N^NOT HISPANIC OR LATINO^HL70189|St. Francis Community Hospital of Lower South Side|N|2|US^United States of America^ISO3166_1|Vet123^retired^ART|BT^Bhutan^ISO3166_1|20080825111630+0115|Y|||20050110015014+0315||125097000^Goat^SCT|4880003^Beagle^SCT|||CA^Canada^ISO3166_1|89898989^WPN^Internet 4 | PD1|S^^ACR||LINDAS TEST ORGANIZATION^^SIISCLIENT818|88^Hippo^rold^H^V^Dr^MD^^TE^^^M10^DN^^||||||||||Methodist Church|||20150202^20150202 5 | NK1|1|Evan&&&&Aniston^ADAM^A^III^Dr.^MD^D|EMC^test^ACR^CHD^^^9.0^10.0|2222&HOME&STREET^Highway^GREENSBORO^NC^27401-1020^US^BI^^jkdha&test^^^^20000110^20050111~111&Duck ST^^Fowl|78788788^WPN^Internet^5555^^^^^^^^^20010110^20020110^^^^18~121111^PRN^CP|88888888^PRN^CP^5555^^^^^^^^878777^20010110^20020110^^^^18~6666666^^BP|O|20210818|20211218|||12345567^4^M11^T1&2.16.840.1.113883.19&HCD^MR^University Hospital^19241011^19241012|TestOrg^^O12^^^^EI^^^Org12||F^^^M|19620110045504||||ara||||||||||Green^John^A^II^DR^MD^D^^^19241012^G~Josh&&&&Bing^^stanley^^^^L|898989898^^FX~88888888^^CP|Street1&Palkstreet~ST-2|I-123^^^^BA~I-222^^^^DI||2106-3^test^FDDC||Security no-23|||1515151515^WPN^CP^555544^^^^^^^^777^20010110^20020110^^^^1|444444^^CP 6 | PV1|1|P|HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|E|1234567^4^M11^t&2.16.840.1.113883.19&HCD^ANON^University Hospital^19241011^19241012|4 East, room 136, bed B 4E^136^B^CommunityHospital^^N^^^|1122334^Alaz^Mohammed^Mahi^JR^Dr.^MD^^PERSONNELt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al|C006^Woolfson^Kathleen^^^Dr^^^TEST&23.2&HCD^MSK^^^BA|C008^Condoc^leen^^^Dr^^^&1.3.6.1.4.1.44750.1.2.2&ISO^NAV^^^BR|SUR|||R|NHS Provider-General (inc.A\T\E-this Hosp)||VIP^Very Important Person^L^IMP^^DCM^v1.1^v1.2^Inportant Person|37^DISNEY^WALT^^^^^^AccMgr^^^^ANC|Inpatient|40007716^^^AccMng&1.2&HCD^AM|||||||||||||||||Admitted as Inpatient^Sample^ACR|22&Homes&FDK|Vegan^Vegetarian|HOMERTON UNIVER||Active|POC^Room-2^Bed-103^^^C^Greenland|Nursing home^^^^^^Rosewood|20150208113419+0110||||||50^^^T123&1.3.6.1.4.1.44750.1.2.2&ISO^MR||Othhel^^^^^^^^testing&&HCD||EOC124^5^M11^Etest&2.16.1&HCD^MR^CommunityHospital 7 | PV2|^ROOM1&2.16.840.1.113883.4.642.1.1108&ISO^BED1^FACILITY1^^^BUILDING1^FLOOR1^^^||140004^Chronic pharyngitis^SCT||||||||2|Health Checkup|12188^Hippocrates^Harold^H^IV^Dr^MD^^TE&Provider Master.Community Health and Hospitals&DNS^^^M10^DN^^|||||||||N|||2^^^3^^^V1.2^V1.3|||||||||||||C 8 | GT1|1|1516^4^M11^test^MR^Unity Hospital^19241011^19241012|RADIANT^LUCY^^|Rebecca^Jonas|1619 SOUTH UNIVERSITY^^MADISON^WI^53703^US|6082517777^^Internet^8484~717171^^PH|021212^^MD|20010412|M|P/F|BRO|G-SSN-12|20010410|20010415|2|EHS GENERIC EMPLOYER|1979 MILKY WAY^^VERONA^WI^53593^US|082719000^^PH|55121^^^^FI|3||N|SLF|20080825111630+0115|Y||||1231^^^^BC|M|20091010|20101010||||ger||||||MothersMaiden|BT^Bhutan^ISO3166_1||Ben^Charles~Ben2|000352^^CP~00121^^FX|Urgent requirement||||GEOrg|||||Germany 9 | IN1|1|BAV^Blue Advantage HMO|IC-1.31^24^BCV^&2.16.840.1.113883.1.1&ISO^NIIP^^19291011^19291012|Blue Cross Blue Shield of Texas|1979 MILKY WAY^^VERONA^WI^53593^US|Henry&&&&Roth^Rony^A^III^Dr.^MD^D^^^19251012|(555)555-5555^BPN^PH|PUBSUMB|SelfPay||Sam P. Hil|19891001|20501001||HMO^health maintenance organization policy|Doe^Rosallie^John^III^Mrs.^Bachelors^R|SPO^Spouse|19750228|3857 Velvet Treasure Terrace^^Midnight^NC^27878^US|||||||||||||||||PN-145|150&USD^DC||||||F^Female|2000 MILKY WAY^^VERONA^WI^53593^US|||B||HMO-12345^^^&2.16.840.1.113883.1.3&ISO^NI 10 | IN2|1117^4^M11^&2.16.840.1.113883.1.4&ISO^EI^University Hospital~1118^^^^BC|425-57-9745|||I^Insurance company|Medicare-12345|Jack&&&&Aniston^ADAM^A^III^Dr.^MD^D^^^19241012^^^^PF^Addsm|MCN-008||MI-12345||||||||||||||||||||||||eng^English|||||||||||||||Richard^Paul|254622222^^PH|||||||||||PNM1234^4^M11^PM&2.6.1&HCD^MR^University Hospital^19241011^19241012||0005245^WPN^Internet~^^CP|555777888^^FX~^^PH||||||Max Life Insurance||02^Spouse 11 | OBX|1|NM|8867-4^heartrate^LN||60~120|beats/min^^ISO|70-80|A^A^HL7nnnn~B^B|||S|||19990702|Org15^ID of producer^CAS|1134^Aly^Zafar^Mahendra^JR^Dr.^MD^^PERSt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al~2234^Pauly^Berrie^Raud|OBS^This is test method^AS4|EI12.3^NI2^426d2726-51fc-00fe-a946-8596e80a80eb^GUID~^^1.3.6.1.4.1.44750.1.2.2^ISO|19990702|BU^Observation site^E5|EI21^OII||FairOaks Hspital|Research Park^Fairfax^VA^22031^USA|MD-25^Atchinson^Christopher^^MD|||||||PAI-1^FAI-1 12 | NTE|1||No Antibodies Detected||MLEE^ATTEND^AARON^A^^^MD|202010101500+0215| 13 | ORC|RE|4422^NIST-AA-IZ-2^2.16.840.1.114222.4.3.3.5.1.2^ISO|13696^NIST-AA-IZ-2^2.16.840.1.114222.4.3.3.5.1.2^ISO|||||||7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN||654^Thomas^Wilma^Elizabeth^^^^^NIST-PI-1^L^^^MD|||202108181126||NISTEHRFAC^NISTEHRFacility^HL70362| 14 | TQ1|1|54^&lbs|P&Post (after)&HL70335^HD^^^12^min^^PC^120^s|11:30:05~11:45:05|30^s|150^&kg|202110091600-0800|202111091600-0800|A|This is condition text|Please follow the text instructions||2^hr|5 15 | RXA|0|1|20210818||49281021588^TENIVAC^NDC|0.5|mL^mL^UCUM||00^New Record^NIP001|7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN|^^^NIST-Clinic-1||||315841|20211216|PMC^Sanofi Pasteur^MVX|||CP|A||||||HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|0007&Hospital Lane^Ste. 123^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway 16 | RXR|C28161^Intramuscular^NCIT|RD^Right Deltoid^HL70163 17 | OBX|1|CE|30963-3^Vaccine Funding Source^LN|1|PHC70^Private^CDCPHINVS||||||F|||20210818 18 | OBX|2|CE|64994-7^Vaccine Funding Program Eligibility^LN|2|V01^Not VFC Eligible^HL70064||||||F|||20210818|||VXC40^per immunization^CDCPHINVS 19 | OBX|3|CE|69764-9^Document Type^LN|3|253088698300028811170411^Tetanus/Diphtheria (Td) Vaccine VIS^cdcgs1vis||||||F|||20210818 20 | OBX|4|DT|29769-7^Date Vis Presented^LN|3|20210818||||||F|||20210818 21 | -------------------------------------------------------------------------------- /sample-data/VXU-V04-02_failedConversion.hl7: -------------------------------------------------------------------------------- 1 | MSH|^~\&|IMMAPP|GHHSFacility^2.16.840.1.122848.1.30^ISO|EHRApp^1.Edu^ISO|GHHRFacility^2.16.840.1.1122848.1.32^ISO|20210818118888+0215|SECURITY|VXU^V04^VXU_V04|MSG00001|P|2.8|||||USA||en-US|||22 GHH Inc.|23 GHH Inc.|24GHH^2.16.840.1.114884.10.20^ISO|25GHH^2.16.840.1.114884.10.23^ISO 2 | SFT|Orion|2.4.3.52854|Rhapsody|2.4.3|Testactivity|20070725111624 3 | PID|1|12345678^8^M11^test^MR^University Hospital^19241011^19241012|PATID12348^5^M11^test1&2.16.1&HCD^MR^GOOD HEALTH HOSPITAL~123456888^^^USSSA^SS|PATID891^^^test3|NOTEVER&&&&Spangler^JOHN^B^III^Dr.^MD^D^^^19241012^^^^PF^Addsm~Jermey&&&&Blight^^durly^^^^L^^^^^19241010^19241015|WARREN^Gina^P|198808181122+0215|M|elbert^Son|2106-3^White^HL70005~2028-9^Asian^HL70005|1000&Hospital Lane^Ste. 561^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway|GL|78788788^^CP^5555^^^1111^^^^^2222^20010110^20020110^^^^18~12121212^^CP|7777^^CP~1111^^TDD|ara^^HL70296^eng^English-us^HL70296^v2^v2.1^TextInEnglish|M^Married|AME|4000776^^^AccMgr&1.3.6.1.4.1.44750.1.2.2&ISO^VN^1^19241011^19241012|PSSN123121234|DLN-123^US^20010123|1212121^^^NTH&rt23&HCD^AND^^19241011^19241012|N^NOT HISPANIC OR LATINO^HL70189|St. Francis Community Hospital of Lower South Side|N|2|US^United States of America^ISO3166_1|Vet123^retired^ART|BT^Bhutan^ISO3166_1|20080825111630+0115|Y|||20050110015014+0315||125097000^Goat^SCT|4880003^Beagle^SCT|||CA^Canada^ISO3166_1|89898989^WPN^Internet 4 | PD1|S^^ACR||LINDAS TEST ORGANIZATION^^SIISCLIENT818|88^Hippo^rold^H^V^Dr^MD^^TE^^^M10^DN^^||||||||||Methodist Church|||20150202^20150202 5 | NK1|1|Evan&&&&Aniston^ADAM^A^III^Dr.^MD^D|EMC^test^ACR^CHD^^^9.0^10.0|2222&HOME&STREET^Highway^GREENSBORO^NC^27401-1020^US^BI^^jkdha&test^^^^20000110^20050111~111&Duck ST^^Fowl|78788788^WPN^Internet^5555^^^^^^^^^20010110^20020110^^^^18~121111^PRN^CP|88888888^PRN^CP^5555^^^^^^^^878777^20010110^20020110^^^^18~6666666^^BP|O|20210818|20211218|||12345567^4^M11^T1&2.16.840.1.113883.19&HCD^MR^University Hospital^19241011^19241012|TestOrg^^O12^^^^EI^^^Org12||F^^^M|19620110045504||||ara||||||||||Green^John^A^II^DR^MD^D^^^19241012^G~Josh&&&&Bing^^stanley^^^^L|898989898^^FX~88888888^^CP|Street1&Palkstreet~ST-2|I-123^^^^BA~I-222^^^^DI||2106-3^test^FDDC||Security no-23|||1515151515^WPN^CP^555544^^^^^^^^777^20010110^20020110^^^^1|444444^^CP 6 | PV1|1|P|HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|E|1234567^4^M11^t&2.16.840.1.113883.19&HCD^ANON^University Hospital^19241011^19241012|4 East, room 136, bed B 4E^136^B^CommunityHospital^^N^^^|1122334^Alaz^Mohammed^Mahi^JR^Dr.^MD^^PERSONNELt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al|C006^Woolfson^Kathleen^^^Dr^^^TEST&23.2&HCD^MSK^^^BA|C008^Condoc^leen^^^Dr^^^&1.3.6.1.4.1.44750.1.2.2&ISO^NAV^^^BR|SUR|||R|NHS Provider-General (inc.A\\T\\E-this Hosp)||VIP^Very Important Person^L^IMP^^DCM^v1.1^v1.2^Inportant Person|37^DISNEY^WALT^^^^^^AccMgr^^^^ANC|Inpatient|40007716^^^AccMng&1.2&HCD^AM|||||||||||||||||Admitted as Inpatient^Sample^ACR|22&Homes&FDK|Vegan^Vegetarian|HOMERTON UNIVER||Active|POC^Room-2^Bed-103^^^C^Greenland|Nursing home^^^^^^Rosewood|20150208113419+0110||||||50^^^T123&1.3.6.1.4.1.44750.1.2.2&ISO^MR||Othhel^^^^^^^^testing&&HCD||EOC124^5^M11^Etest&2.16.1&HCD^MR^CommunityHospital 7 | PV2|^ROOM1&2.16.840.1.113883.4.642.1.1108&ISO^BED1^FACILITY1^^^BUILDING1^FLOOR1^^^||140004^Chronic pharyngitis^SCT||||||||2|Health Checkup|12188^Hippocrates^Harold^H^IV^Dr^MD^^TE&Provider Master.Community Health and Hospitals&DNS^^^M10^DN^^|||||||||N|||2^^^3^^^V1.2^V1.3|||||||||||||C 8 | GT1|1|1516^4^M11^test^MR^Unity Hospital^19241011^19241012|RADIANT^LUCY^^|Rebecca^Jonas|1619 SOUTH UNIVERSITY^^MADISON^WI^53703^US|6082517777^^Internet^8484~717171^^PH|021212^^MD|20010412|M|P/F|BRO|G-SSN-12|20010410|20010415|2|EHS GENERIC EMPLOYER|1979 MILKY WAY^^VERONA^WI^53593^US|082719000^^PH|55121^^^^FI|3||N|SLF|20080825111630+0115|Y||||1231^^^^BC|M|20091010|20101010||||ger||||||MothersMaiden|BT^Bhutan^ISO3166_1||Ben^Charles~Ben2|000352^^CP~00121^^FX|Urgent requirement||||GEOrg|||||Germany 9 | IN1|1|BAV^Blue Advantage HMO|IC-1.31^24^BCV^&2.16.840.1.113883.1.1&ISO^NIIP^^19291011^19291012|Blue Cross Blue Shield of Texas|1979 MILKY WAY^^VERONA^WI^53593^US|Henry&&&&Roth^Rony^A^III^Dr.^MD^D^^^19251012|(555)555-5555^BPN^PH|PUBSUMB|SelfPay||Sam P. Hil|19891001|20501001||HMO^health maintenance organization policy|Doe^Rosallie^John^III^Mrs.^Bachelors^R|SPO^Spouse|19750228|3857 Velvet Treasure Terrace^^Midnight^NC^27878^US|||||||||||||||||PN-145|150&USD^DC||||||F^Female|2000 MILKY WAY^^VERONA^WI^53593^US|||B||HMO-12345^^^&2.16.840.1.113883.1.3&ISO^NI 10 | IN2|1117^4^M11^&2.16.840.1.113883.1.4&ISO^EI^University Hospital~1118^^^^BC|425-57-9745|||I^Insurance company|Medicare-12345|Jack&&&&Aniston^ADAM^A^III^Dr.^MD^D^^^19241012^^^^PF^Addsm|MCN-008||MI-12345||||||||||||||||||||||||eng^English|||||||||||||||Richard^Paul|254622222^^PH|||||||||||PNM1234^4^M11^PM&2.6.1&HCD^MR^University Hospital^19241011^19241012||0005245^WPN^Internet~^^CP|555777888^^FX~^^PH||||||Max Life Insurance||02^Spouse 11 | OBX|1|NM|8867-4^heartrate^LN||60~120|beats/min^^ISO|70-80|A^A^HL7nnnn~B^B|||S|||19990702|Org15^ID of producer^CAS|1134^Aly^Zafar^Mahendra^JR^Dr.^MD^^PERSt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al~2234^Pauly^Berrie^Raud|OBS^This is test method^AS4|EI12.3^NI2^426d2726-51fc-00fe-a946-8596e80a80eb^GUID~^^1.3.6.1.4.1.44750.1.2.2^ISO|19990702|BU^Observation site^E5|EI21^OII||FairOaks Hspital|Research Park^Fairfax^VA^22031^USA|MD-25^Atchinson^Christopher^^MD|||||||PAI-1^FAI-1 12 | NTE|1||No Antibodies Detected||MLEE^ATTEND^AARON^A^^^MD|202010101500+0215| 13 | ORC|RE|4422^NIST-AA-IZ-2^2.16.840.1.114222.4.3.3.5.1.2^ISO|13696^NIST-AA-IZ-2^2.16.840.1.114222.4.3.3.5.1.2^ISO|||||||7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN||654^Thomas^Wilma^Elizabeth^^^^^NIST-PI-1^L^^^MD|||202108181126||NISTEHRFAC^NISTEHRFacility^HL70362| 14 | TQ1|1|54^&lbs|P&Post (after)&HL70335^HD^^^12^min^^PC^120^s|11:30:05~11:45:05|30^s|150^&kg|202110091600-0800|202111091600-0800|A|This is condition text|Please follow the text instructions||2^hr|5 15 | RXA|0|1|20210818||49281021588^TENIVAC^NDC|0.5|mL^mL^UCUM||00^New Record^NIP001|7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN|^^^NIST-Clinic-1||||315841|20211216|PMC^Sanofi Pasteur^MVX|||CP|A||||||HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|0007&Hospital Lane^Ste. 123^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway 16 | RXR|C28161^Intramuscular^NCIT|RD^Right Deltoid^HL70163 17 | OBX|1|CE|30963-3^Vaccine Funding Source^LN|1|PHC70^Private^CDCPHINVS||||||F|||20210818 18 | OBX|2|CE|64994-7^Vaccine Funding Program Eligibility^LN|2|V01^Not VFC Eligible^HL70064||||||F|||20210818|||VXC40^per immunization^CDCPHINVS 19 | OBX|3|CE|69764-9^Document Type^LN|3|253088698300028811170411^Tetanus/Diphtheria (Td) Vaccine VIS^cdcgs1vis||||||F|||20210818 20 | OBX|4|DT|29769-7^Date Vis Presented^LN|3|20210818||||||F|||20210818 -------------------------------------------------------------------------------- /sample-data/VXU-V04-02_failedUpload.hl7: -------------------------------------------------------------------------------- 1 | MSH|^~\&|IMMAPP|GHHSFacility^2.16.840.1.122848.1.30^ISO|EHRApp^1.Edu^ISO|GHHRFacility^2.16.840.1.1122848.1.32^ISO|202108181126+0215|SECURITY|VXU^V04^VXU_V04|MSG00001|P|2.8|||||USA||en-US|||22 GHH Inc.|23 GHH Inc.|24GHH^2.16.840.1.114884.10.20^ISO|25GHH^2.16.840.1.114884.10.23^ISO 2 | SFT|Orion|2.4.3.52854|Rhapsody|2.4.3|Testactivity|20070725111624 3 | PID|1|1234567^4^M11^test^MR^University Hospital^19241011^19241012|PATID1234^5^M11^test1&2.16.1&HCD^MR^GOOD HEALTH HOSPITAL~123456789^^^USSSA^SS|PATID567^^^test2|EVERYMAN&&&&Aniston^ADAM^A^III^Dr.^MD^D^^^19241012^^^^PF^Addsm~Josh&&&&Bing^^stanley^^^^L^^^^^19241010^19241015|SMITH^Angela^L|198808181126+0215|M|elbert^Son|2106-3^White^HL70005~2028-9^Asian^HL70005|1000&Hospital Lane^Ste. 123^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway|GL|78788788^^CP^5555^^^1111^^^^^2222^20010110^20020110^^^^18~12121212^^CP|7777^^CP~1111^^TDD|ara^^HL70296^eng^English-us^HL70296^v2^v2.1^TextInEnglish|M^Married|AME|4000776^^^AccMgr&1.3.6.1.4.1.44750.1.2.2&ISO^VN^1^19241011^19241012|PSSN123121234|DLN-123^US^20010123|1212121^^^NTH&rt23&HCD^AND^^19241011^19241012|N^NOT HISPANIC OR LATINO^HL70189|St. Francis Community Hospital of Lower South Side|N|2|US^United States of America^ISO3166_1|Vet123^retired^ART|BT^Bhutan^ISO3166_1|20080825111630+0115|Y|||20050110015014+0315||125097000^Goat^SCT|4880003^Beagle^SCT|||CA^Canada^ISO3166_1|89898989^WPN^Internet 4 | PD1|S^^ACR||LINDAS TEST ORGANIZATION^^SIISCLIENT818|88^Hippo^rold^H^V^Dr^MD^^TE^^^M10^DN^^||||||||||Methodist Church|||20150202^20150202 5 | NK1|1|Evan&&&&Aniston^ADAM^A^III^Dr.^MD^D|EMC^test^ACR^CHD^^^9.0^10.0|2222&HOME&STREET^Highway^GREENSBORO^NC^27401-1020^US^BI^^jkdha&test^^^^20000110^20050111~111&Duck ST^^Fowl|78788788^WPN^Internet^5555^^^^^^^^^20010110^20020110^^^^18~121111^PRN^CP|88888888^PRN^CP^5555^^^^^^^^878777^20010110^20020110^^^^18~6666666^^BP|O|20210818|20211218|||12345567^4^M11^T1&2.16.840.1.113883.19&HCD^MR^University Hospital^19241011^19241012|TestOrg^^O12^^^^EI^^^Org12||F^^^M|19620110045504||||ara||||||||||Green^John^A^II^DR^MD^D^^^19241012^G~Josh&&&&Bing^^stanley^^^^L|898989898^^FX~88888888^^CP|Street1&Palkstreet~ST-2|I-123^^^^BA~I-222^^^^DI||2106-3^test^FDDC||Security no-23|||1515151515^WPN^CP^555544^^^^^^^^777^20010110^20020110^^^^1|444444^^CP 6 | PV1|1|P|HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|E|1234567^4^M11^t&2.16.840.1.113883.19&HCD^ANON^University Hospital^19241011^19241012|4 East, room 136, bed B 4E^136^B^CommunityHospital^^N^^^|1122334^Alaz^Mohammed^Mahi^JR^Dr.^MD^^PERSONNELt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al|C006^Woolfson^Kathleen^^^Dr^^^TEST&23.2&HCD^MSK^^^BA|C008^Condoc^leen^^^Dr^^^&1.3.6.1.4.1.44750.1.2.2&ISO^NAV^^^BR|SUR|||R|NHS Provider-General (inc.A\T\E-this Hosp)||VIP^Very Important Person^L^IMP^^DCM^v1.1^v1.2^Inportant Person|37^DISNEY^WALT^^^^^^AccMgr^^^^ANC|Inpatient|40007716^^^AccMng&1.2&HCD^AM|||||||||||||||||Admitted as Inpatient^Sample^ACR|22&Homes&FDK|Vegan^Vegetarian|HOMERTON UNIVER||Active|POC^Room-2^Bed-103^^^C^Greenland|Nursing home^^^^^^Rosewood|20150208113419+0110||||||50^^^T123&1.3.6.1.4.1.44750.1.2.2&ISO^MR||Othhel^^^^^^^^testing&&HCD||EOC124^5^M11^Etest&2.16.1&HCD^MR^CommunityHospital 7 | PV2|^ROOM1&2.16.840.1.113883.4.642.1.1108&ISO^BED1^FACILITY1^^^BUILDING1^FLOOR1^^^||140004^Chronic pharyngitis^SCT||||||||2|Health Checkup|12188^Hippocrates^Harold^H^IV^Dr^MD^^TE&Provider Master.Community Health and Hospitals&DNS^^^M10^DN^^|||||||||N|||2^^^3^^^V1.2^V1.3|||||||||||||C 8 | GT1|1|1516^4^M11^test^MR^Unity Hospital^19241011^19241012|RADIANT^LUCY^^|Rebecca^Jonas|1619 SOUTH UNIVERSITY^^MADISON^WI^53703^US|6082517777^^Internet^8484~717171^^PH|021212^^MD|20010412|M|P/F|BRO|G-SSN-12|20010410|20010415|2|EHS GENERIC EMPLOYER|1979 MILKY WAY^^VERONA^WI^53593^US|082719000^^PH|55121^^^^FI|3||N|SLF|20080825111630+0115|Y||||1231^^^^BC|M|20091010|20101010||||ger||||||MothersMaiden|BT^Bhutan^ISO3166_1||Ben^Charles~Ben2|000352^^CP~00121^^FX|Urgent requirement||||GEOrg|||||Germany 9 | IN1|1|BAV^Blue Advantage HMO|IC-1.31^24^BCV^&2.16.840.1.113883.1.1&ISO^NIIP^^19291011^19291012|Blue Cross Blue Shield of Texas|1979 MILKY WAY^^VERONA^WI^53593^US|Henry&&&&Roth^Rony^A^III^Dr.^MD^D^^^19251012|(555)555-5555^BPN^PH|PUBSUMB|SelfPay||Sam P. Hil|19891001|20501001||HMO^health maintenance organization policy|Doe^Rosallie^John^III^Mrs.^Bachelors^R|SPO^Spouse|19750228|3857 Velvet Treasure Terrace^^Midnight^NC^27878^US|||||||||||||||||PN-145|150&USD^DC||||||F^Female|2000 MILKY WAY^^VERONA^WI^53593^US|||B||HMO-12345^^^&2.16.840.1.113883.1.3&ISO^NI 10 | IN2|1117^4^M11^&2.16.840.1.113883.1.4&ISO^EI^University Hospital~1118^^^^BC|425-57-9745|||I^Insurance company|Medicare-12345|Jack&&&&Aniston^ADAM^A^III^Dr.^MD^D^^^19241012^^^^PF^Addsm|MCN-008||MI-12345||||||||||||||||||||||||eng^English|||||||||||||||Richard^Paul|254622222^^PH|||||||||||PNM1234^4^M11^PM&2.6.1&HCD^MR^University Hospital^19241011^19241012||0005245^WPN^Internet~^^CP|555777888^^FX~^^PH||||||Max Life Insurance||02^Spouse 11 | OBX|1|NM|8867-4^heartrate^LN||60~120|beats/min^^ISO|70-80|A^A^HL7nnnn~B^B|||S|||19990702|Org15^ID of producer^CAS|1134^Aly^Zafar^Mahendra^JR^Dr.^MD^^PERSt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al~2234^Pauly^Berrie^Raud|OBS^This is test method^AS4|EI12.3^NI2^426d2726-51fc-00fe-a946-8596e80a80eb^GUID~^^1.3.6.1.4.1.44750.1.2.2^ISO|19990702|BU^Observation site^E5|EI21^OII||FairOaks Hspital|Research Park^Fairfax^VA^22031^USA|MD-25^Atchinson^Christopher^^MD|||||||PAI-1^FAI-1 12 | PRT|1|AD||AD^Admitting Provider^HL70912|10535^van Beethoven&van^Ludwig^A^III^Dr^PHD||1^Hospital^HL70406|Good Health Hospital|HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|SampleDevice|20210223093000|20210224100000||6547 Drums St^^Flint^MI^48507|^PRN^PH^^^810^4502369|00643169001763^^2.51.1.1^ISO|20140401|20220712|123ABC|21A11F4855| RA12345678BA123^^2.16.840.1.113883.6.18.1.34^ISO|| 13 | NTE|1||No Antibodies Detected||MLEE^ATTEND^AARON^A^^^MD|202010101500+0215| 14 | ORC|RE|4422^NIST-AA-IZ-2^2.16.840.1.114222.4.3.3.5.1.2^ISO|13696^NIST-AA-IZ-2^2.16.840.1.114222.4.3.3.5.1.2^ISO|||||||7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN||654^Thomas^Wilma^Elizabeth^^^^^NIST-PI-1^L^^^MD|||202108181126||NISTEHRFAC^NISTEHRFacility^HL70362| 15 | PRT||AD||AD||PPT||General Medical Center|Street-5^136^B^CommunityHospital^^N^^^|PD-10.1^PD-10.2 16 | TQ1|1|54^&lbs|P&Post (after)&HL70335^HD^^^12^min^^PC^120^s|11:30:05~11:45:05|30^s|150^&kg|202110091600-0800|202111091600-0800|A|This is condition text|Please follow the text instructions||2^hr|5 17 | RXA|0|1|20210818||49281021588^TENIVAC^NDC|0.5|mL^mL^UCUM||00^New Record^NIP001|7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN|^^^NIST-Clinic-1||||315841|20211216|PMC^Sanofi Pasteur^MVX|||CP|A||||||HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|0007&Hospital Lane^Ste. 123^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway 18 | RXR|C28161^Intramuscular^NCIT|RD^Right Deltoid^HL70163 19 | OBX|1|CE|30963-3^Vaccine Funding Source^LN|1|PHC70^Private^CDCPHINVS||||||F|||20210818 20 | OBX|2|CE|64994-7^Vaccine Funding Program Eligibility^LN|2|V01^Not VFC Eligible^HL70064||||||F|||20210818|||VXC40^per immunization^CDCPHINVS 21 | OBX|3|CE|69764-9^Document Type^LN|3|253088698300028811170411^Tetanus/Diphtheria (Td) Vaccine VIS^cdcgs1vis||||||F|||20210818 22 | OBX|4|DT|29769-7^Date Vis Presented^LN|3|20210818||||||F|||20210818 23 | PRT||CO||CP|432^Jartckson|PPT||Mayo12 Memorial|Eureka Springs^136^B^CommunityHospital^^N^^^|PDORC-10.1^PD-10.2|20210223093000|20210224100000||6547 Drums St^^Flint^MI^48507|^PRN^PH^^^810^4502369|00643169001763^^2.51.1.1^ISO|20140401|20220712|123ABC|21A11F4855| RA12345678BA123^^2.16.840.1.113883.6.18.1.34^ISO|| 24 | -------------------------------------------------------------------------------- /sample-data/VXU_single_messy_demo.hl7: -------------------------------------------------------------------------------- 1 | MSH|^~\&|IMMAPP|GHHSFacility^2.16.840.1.122848.1.30^ISO|EHRApp^2.Edu^ISO|GHHRFacility^2.16.840.1.1122848.1.32^ISO|202108181126+0215|SECURITY|VXU^V04^VXU_V04|MSG00004|P|2.8|||||USA||en-US|||22 GHH Inc.|23 GHH Inc.|24GHH^2.16.840.1.114884.10.20^ISO|25GHH^2.16.840.1.114884.10.23^ISO 2 | SFT|Orion|2.4.3.52854|Rhapsody|2.4.3|Testactivity|20080825121624 3 | PID|1|7777555^4^M11^test^MR^University Hospital^19241011^19241012|PATID7755^5^M11^test1|PATID7758^^^test5|doe .^ John1 ^A.|TEST^Mother, of^L|198505101126+0215|M||2106-3^White^HL70005|555 E. 3065 S.^^Salt Lake CIty^ut^84106^USA||801-540-3661^^CP|||M^Married||4880776||||N^NOT HISPANIC OR LATINO^HL70189||N||US^United States of America^ISO3166_1||||N|||20080110015014+0315||||||| 4 | PV1|1|P|ISO^NWG^2|E|||1122334^Alaz^Mohammed^Mahi^JR^Dr.^MD^^PERSONNELt|||SUR||||||||Inpatient|48887716^^^AccMng|||||||||||||||||||||||||20150208113419+0110|||||||||| 5 | OBX|1|NM|8867-4^heartrate^LN||60~120|beats/min^^ISO|70-80|A^A^HL7nnnn~B^B|||S|||19990702|Org15^ID of producer^CAS|1134^Aly^Zafar^Mahendra^JR^Dr.^MD^^PERSt&1.23&HCD^B^^^BR^^^^^^19241010^19241015^Al~2234^Pauly^Berrie^Raud|OBS^This is test method^AS4|EI12.3^NI2^426d2726-51fc-00fe-a946-8596e80a80eb^GUID~^^1.3.6.1.4.1.44750.1.2.2^ISO|19990702|BU^Observation site^E5|EI21^OII||FairOaks Hspital|Research Park^Fairfax^VA^22031^USA|MD-25^Atchinson^Christopher^^MD|||||||PAI-1^FAI-1 6 | RXA|0|1|20210818||49281021588^TENIVAC^NDC|0.5|mL^mL^UCUM||00^New Record^NIP001|7824^Jackson^Lily^Suzanne^^^^^NIST-PI-1^L^^^PRN|^^^NIST-Clinic-1||||315841|20211216|PMC^Sanofi Pasteur^MVX|||CP|A||||||HUH AE OMU&9.8&ISO^OMU B^Bed 03^HOMERTON UNIVER^^C^Homerton UH^Floor5|0007&Hospital Lane^Ste. 123^Ann Arbor ^MI^99999^USA^M^^&W^^^20000110&20000120^^^^^^^Near Highway 7 | RXR|C28161^Intramuscular^NCIT|RD^Right Deltoid^HL70163 8 | OBX|1|CE|30963-3^Vaccine Funding Source^LN|1|PHC70^Private^CDCPHINVS||||||F|||20210818 9 | OBX|2|CE|64994-7^Vaccine Funding Program Eligibility^LN|2|V01^Not VFC Eligible^HL70064||||||F|||20210818|||VXC40^per immunization^CDCPHINVS 10 | OBX|3|CE|69764-9^Document Type^LN|3|253088698300028811170411^Tetanus/Diphtheria (Td) Vaccine VIS^cdcgs1vis||||||F|||20210818 11 | OBX|4|DT|29769-7^Date Vis Presented^LN|3|20210818||||||F|||20210818 12 | -------------------------------------------------------------------------------- /scripts/Synapse/ReRunECRfromPostBundle.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "962a47d0-b2c4-4516-84c0-b947280645fe", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import datetime\n", 11 | "from pyspark.sql import SparkSession\n", 12 | "from pyspark.sql.functions import lit, current_timestamp\n", 13 | "from notebookutils import mssparkutils\n", 14 | "\n", 15 | "spark = SparkSession.builder.getOrCreate()\n", 16 | "\n", 17 | "# source and destination paths\n", 18 | "storage_account = \"$STORAGE_ACCOUNT\"\n", 19 | "ecr_post_bundle_file_path = f\"abfss://bundle-snapshots@{storage_account}.dfs.core.windows.net/post/ecr\"\n", 20 | "ecr_rerun_file_path = f\"abfss://source-data@{storage_account}.dfs.core.windows.net//ecr-rerun\"\n", 21 | "\n", 22 | "# parquet log file: timestamp, filename, and destination path\n", 23 | "timestamp_str = datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", 24 | "parquet_file_name = f\"copied_files_log_{timestamp_str}.parquet\"\n", 25 | "delta_tables = f\"abfss://delta-tables@{storage_account}.dfs.core.windows.net/\"\n", 26 | "parquet_file_path = f\"{delta_tables}/ecr-rerun-logs/{parquet_file_name}\"\n", 27 | "\n", 28 | "# dataframe to track moved files\n", 29 | "copied_files_log = spark.createDataFrame([], schema=\"filename string, source_path string, dest_path string, timestamp string, file_exists_skip boolean, success boolean\")\n", 30 | "\n", 31 | "# outer try/except for acessing list of file\n", 32 | "# inner try/except for issues copying files and marking success or failure\n", 33 | "try:\n", 34 | " # get list of files\n", 35 | " files = mssparkutils.fs.ls(ecr_post_bundle_file_path)\n", 36 | "\n", 37 | " for file in files:\n", 38 | " # initialize 'success' flag\n", 39 | " success = True\n", 40 | " try:\n", 41 | " src_path = file.path\n", 42 | " dest_path = f\"{ecr_rerun_file_path}/{file.name}\"\n", 43 | "\n", 44 | " # capture the timestamp before copying the file\n", 45 | " copy_timestamp = datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", 46 | "\n", 47 | " # check if the file exists\n", 48 | " file_exists = mssparkutils.fs.exists(dest_path)\n", 49 | "\n", 50 | " # copy the files if it doesn't exist\n", 51 | " if not file_exists:\n", 52 | " mssparkutils.fs.cp(src=src_path, dest=dest_path)\n", 53 | " else:\n", 54 | " # if the file already exists, set 'success' to false\n", 55 | " success = False\n", 56 | "\n", 57 | " except Exception as e:\n", 58 | " # if there's an error copying, set 'success' to false\n", 59 | " success = False\n", 60 | " print(f\"Error copying file {file.name}: {str(e)}\")\n", 61 | "\n", 62 | " # log the file copy\n", 63 | " new_row = spark.createDataFrame([(file.name, src_path, dest_path, copy_timestamp, file_exists, success)])\n", 64 | " copied_files_log = copied_files_log.union(new_row)\n", 65 | "\n", 66 | "except Exception as e:\n", 67 | " print(f\"Error retrieving file list: {str(e)}\")\n", 68 | " \n", 69 | "# add current timestamp\n", 70 | "copied_files_log = copied_files_log.withColumn(\"log_timestamp\", current_timestamp())\n", 71 | "\n", 72 | "# write log to parquet\n", 73 | "copied_files_log.write.mode(\"append\").parquet(parquet_file_path)\n", 74 | " \n", 75 | "# inspect log of moved files\n", 76 | "copied_files_log.show()" 77 | ] 78 | } 79 | ], 80 | "metadata": { 81 | "kernelspec": { 82 | "display_name": "Python 3 (ipykernel)", 83 | "language": "python", 84 | "name": "python3" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.10.13" 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 5 101 | } 102 | -------------------------------------------------------------------------------- /scripts/Synapse/config/ECRDatastoreRefreshConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ECR Datastore Refresh", 3 | "properties": { 4 | "description": "Updates the ECR Datastore daily with ECR data that has run through the DIBBs Pipeline", 5 | "activities": [ 6 | { 7 | "name": "UpdateECRDataStore", 8 | "type": "SynapseNotebook", 9 | "dependsOn": [], 10 | "policy": { 11 | "timeout": "0.12:00:00", 12 | "retry": 0, 13 | "retryIntervalInSeconds": 30, 14 | "secureOutput": false, 15 | "secureInput": false 16 | }, 17 | "userProperties": [], 18 | "typeProperties": { 19 | "notebook": { 20 | "referenceName": "updateECRDataStore", 21 | "type": "NotebookReference" 22 | }, 23 | "snapshot": true, 24 | "sparkPool": { 25 | "referenceName": "sparkpool", 26 | "type": "BigDataPoolReference" 27 | } 28 | } 29 | } 30 | ], 31 | "annotations": [] 32 | } 33 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/ECRDatastoreRefreshDailyTriggerConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ECR Datastore Refresh Daily Trigger", 3 | "properties": { 4 | "annotations": [], 5 | "runtimeState": "Started", 6 | "pipelines": [ 7 | { 8 | "pipelineReference": { 9 | "referenceName": "ECR Datastore Refresh", 10 | "type": "PipelineReference" 11 | } 12 | } 13 | ], 14 | "type": "ScheduleTrigger", 15 | "typeProperties": { 16 | "recurrence": { 17 | "frequency": "Day", 18 | "interval": 1, 19 | "startTime": "2023-07-20T11:30:00", 20 | "timeZone": "Pacific Standard Time" 21 | } 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/SynapseAnalyticsPipelineConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Synapse Analytics Pipeline", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "updateECRDataStore", 7 | "type": "SynapseNotebook", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "0.12:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "notebook": { 19 | "referenceName": "updateECRDataStore", 20 | "type": "NotebookReference" 21 | }, 22 | "sparkPool": { 23 | "referenceName": "sparkpool", 24 | "type": "BigDataPoolReference" 25 | }, 26 | "executorSize": "Small", 27 | "conf": { 28 | "spark.dynamicAllocation.enabled": null, 29 | "spark.dynamicAllocation.minExecutors": null, 30 | "spark.dynamicAllocation.maxExecutors": null 31 | }, 32 | "driverSize": "Small", 33 | "numExecutors": null 34 | } 35 | }, 36 | { 37 | "name": "updateECRDataStorePersonID", 38 | "type": "SynapseNotebook", 39 | "dependsOn": [ 40 | { 41 | "activity": "updateECRDataStore", 42 | "dependencyConditions": [ 43 | "Succeeded" 44 | ] 45 | } 46 | ], 47 | "policy": { 48 | "timeout": "0.12:00:00", 49 | "retry": 0, 50 | "retryIntervalInSeconds": 30, 51 | "secureOutput": false, 52 | "secureInput": false 53 | }, 54 | "userProperties": [], 55 | "typeProperties": { 56 | "notebook": { 57 | "referenceName": "updateECRDataStorePersonID", 58 | "type": "NotebookReference" 59 | }, 60 | "sparkPool": { 61 | "referenceName": "sparkpool", 62 | "type": "BigDataPoolReference" 63 | }, 64 | "executorSize": "Small", 65 | "conf": { 66 | "spark.dynamicAllocation.enabled": null, 67 | "spark.dynamicAllocation.minExecutors": null, 68 | "spark.dynamicAllocation.maxExecutors": null 69 | }, 70 | "driverSize": "Small", 71 | "numExecutors": null 72 | } 73 | }, 74 | { 75 | "name": "updateECRDataStoreIrisID", 76 | "type": "SynapseNotebook", 77 | "dependsOn": [ 78 | { 79 | "activity": "updateECRDataStorePersonID", 80 | "dependencyConditions": [ 81 | "Succeeded" 82 | ] 83 | } 84 | ], 85 | "policy": { 86 | "timeout": "0.12:00:00", 87 | "retry": 0, 88 | "retryIntervalInSeconds": 30, 89 | "secureOutput": false, 90 | "secureInput": false 91 | }, 92 | "userProperties": [], 93 | "typeProperties": { 94 | "notebook": { 95 | "referenceName": "updateECRDataStoreIrisID", 96 | "type": "NotebookReference" 97 | }, 98 | "sparkPool": { 99 | "referenceName": "sparkpool", 100 | "type": "BigDataPoolReference" 101 | }, 102 | "executorSize": "Small", 103 | "conf": { 104 | "spark.dynamicAllocation.enabled": null, 105 | "spark.dynamicAllocation.minExecutors": null, 106 | "spark.dynamicAllocation.maxExecutors": null 107 | }, 108 | "driverSize": "Small", 109 | "numExecutors": null 110 | } 111 | }, 112 | { 113 | "name": "updateECRDataStoreIncidentID", 114 | "type": "SynapseNotebook", 115 | "dependsOn": [ 116 | { 117 | "activity": "updateECRDataStoreIrisID", 118 | "dependencyConditions": [ 119 | "Succeeded" 120 | ] 121 | } 122 | ], 123 | "policy": { 124 | "timeout": "0.12:00:00", 125 | "retry": 0, 126 | "retryIntervalInSeconds": 30, 127 | "secureOutput": false, 128 | "secureInput": false 129 | }, 130 | "userProperties": [], 131 | "typeProperties": { 132 | "notebook": { 133 | "referenceName": "updateECRDataStoreIncidentID", 134 | "type": "NotebookReference" 135 | }, 136 | "sparkPool": { 137 | "referenceName": "sparkpool", 138 | "type": "BigDataPoolReference" 139 | }, 140 | "executorSize": "Small", 141 | "conf": { 142 | "spark.dynamicAllocation.enabled": null, 143 | "spark.dynamicAllocation.minExecutors": null, 144 | "spark.dynamicAllocation.maxExecutors": null 145 | }, 146 | "driverSize": "Small", 147 | "numExecutors": null 148 | } 149 | }, 150 | { 151 | "name": "generateIRISCaseFiles", 152 | "type": "SynapseNotebook", 153 | "dependsOn": [ 154 | { 155 | "activity": "updateECRDataStoreIncidentID", 156 | "dependencyConditions": [ 157 | "Succeeded" 158 | ] 159 | } 160 | ], 161 | "policy": { 162 | "timeout": "0.12:00:00", 163 | "retry": 0, 164 | "retryIntervalInSeconds": 30, 165 | "secureOutput": false, 166 | "secureInput": false 167 | }, 168 | "userProperties": [], 169 | "typeProperties": { 170 | "notebook": { 171 | "referenceName": "generateIRISCaseFiles", 172 | "type": "NotebookReference" 173 | }, 174 | "sparkPool": { 175 | "referenceName": "sparkpool", 176 | "type": "BigDataPoolReference" 177 | }, 178 | "executorSize": "Small", 179 | "conf": { 180 | "spark.dynamicAllocation.enabled": null, 181 | "spark.dynamicAllocation.minExecutors": null, 182 | "spark.dynamicAllocation.maxExecutors": null 183 | }, 184 | "driverSize": "Small", 185 | "numExecutors": null 186 | } 187 | } 188 | ], 189 | "variables": { 190 | "test": { 191 | "type": "String", 192 | "defaultValue": "test" 193 | } 194 | }, 195 | "annotations": [], 196 | "lastPublishTime": "2023-08-07T22:50:54Z" 197 | }, 198 | "type": "Microsoft.Synapse/workspaces/pipelines" 199 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/SynapseAnalyticsPipelineWeeklyTriggerConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Synapse Analytics Pipeline Weekly Trigger", 3 | "properties": { 4 | "annotations": [], 5 | "runtimeState": "Started", 6 | "pipelines": [ 7 | { 8 | "pipelineReference": { 9 | "referenceName": "Synapse Analytics Pipeline", 10 | "type": "PipelineReference" 11 | } 12 | } 13 | ], 14 | "type": "ScheduleTrigger", 15 | "typeProperties": { 16 | "recurrence": { 17 | "frequency": "Week", 18 | "interval": 1, 19 | "startTime": "2023-07-05T20:45:00", 20 | "timeZone": "Pacific Standard Time" 21 | } 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/UpdateMIIConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Update MII", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "updateMII", 7 | "type": "SynapseNotebook", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "0.12:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "notebook": { 19 | "referenceName": "updateMII", 20 | "type": "NotebookReference" 21 | }, 22 | "parameters": { 23 | "filename": { 24 | "value": { 25 | "value": "@pipeline().parameters.triggeringFile", 26 | "type": "Expression" 27 | }, 28 | "type": "string" 29 | } 30 | }, 31 | "snapshot": true, 32 | "sparkPool": { 33 | "referenceName": "sparkpool", 34 | "type": "BigDataPoolReference" 35 | } 36 | } 37 | } 38 | ], 39 | "parameters": { 40 | "triggeringFile": { 41 | "type": "string" 42 | } 43 | }, 44 | "annotations": [], 45 | "lastPublishTime": "2023-08-08T17:57:25Z" 46 | }, 47 | "type": "Microsoft.Synapse/workspaces/pipelines" 48 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/UpdateMIITriggerConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Update MII Trigger", 3 | "properties": { 4 | "annotations": [], 5 | "runtimeState": "Started", 6 | "pipelines": [ 7 | { 8 | "pipelineReference": { 9 | "referenceName": "Update MII", 10 | "type": "PipelineReference" 11 | }, 12 | "parameters": { 13 | "triggeringFile": "@trigger().outputs.body.fileName" 14 | } 15 | } 16 | ], 17 | "type": "BlobEventsTrigger", 18 | "typeProperties": { 19 | "blobPathBeginsWith": "/patient-data/blobs/MII", 20 | "blobPathEndsWith": ".parquet", 21 | "ignoreEmptyBlobs": true, 22 | "scope": "/subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP_NAME}/providers/Microsoft.Storage/storageAccounts/phdi${TF_ENV}phi${SHORT_CID}", 23 | "events": [ 24 | "Microsoft.Storage.BlobCreated" 25 | ] 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/UpdateMPIConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Update MPI", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Update MPI", 7 | "type": "SynapseNotebook", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "0.12:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "notebook": { 19 | "referenceName": "convertParquetMPI", 20 | "type": "NotebookReference" 21 | }, 22 | "parameters": { 23 | "filename": { 24 | "value": { 25 | "value": "@pipeline().parameters.triggeringFile", 26 | "type": "Expression" 27 | }, 28 | "type": "string" 29 | } 30 | }, 31 | "snapshot": true, 32 | "sparkPool": { 33 | "referenceName": "sparkpool", 34 | "type": "BigDataPoolReference" 35 | } 36 | } 37 | } 38 | ], 39 | "parameters": { 40 | "triggeringFile": { 41 | "type": "string" 42 | } 43 | }, 44 | "annotations": [], 45 | "lastPublishTime": "2023-08-07T21:07:18Z" 46 | }, 47 | "type": "Microsoft.Synapse/workspaces/pipelines" 48 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/UpdateMPITriggerConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Update MPI Trigger", 3 | "properties": { 4 | "annotations": [], 5 | "runtimeState": "Started", 6 | "pipelines": [ 7 | { 8 | "pipelineReference": { 9 | "referenceName": "Update MPI", 10 | "type": "PipelineReference" 11 | }, 12 | "parameters": { 13 | "triggeringFile": "@trigger().outputs.body.fileName" 14 | } 15 | } 16 | ], 17 | "type": "BlobEventsTrigger", 18 | "typeProperties": { 19 | "blobPathBeginsWith": "/patient-data/blobs/MPI", 20 | "blobPathEndsWith": ".parquet", 21 | "ignoreEmptyBlobs": true, 22 | "scope": "/subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP_NAME}/providers/Microsoft.Storage/storageAccounts/phdi${TF_ENV}phi${SHORT_CID}", 23 | "events": [ 24 | "Microsoft.Storage.BlobCreated" 25 | ] 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /scripts/Synapse/config/covid_identification_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "covid_test_type_codes": [ 3 | "94500-6", 4 | "94845-5", 5 | "94563-4", 6 | "94564-2", 7 | "94505-5", 8 | "85477-8", 9 | "85478-6", 10 | "85479-4", 11 | "94558-4", 12 | "94534-5", 13 | "96123-5", 14 | "97097-0", 15 | "94508-9", 16 | "94507-1", 17 | "95970-0", 18 | "95409-9", 19 | "95608-6", 20 | "94533-7", 21 | "94759-8", 22 | "76078-5", 23 | "77026-3", 24 | "77028-9", 25 | "77027-1", 26 | "76080-1", 27 | "76089-2", 28 | "94640-0", 29 | "95406-5", 30 | "94756-4", 31 | "94757-2", 32 | "92142-9", 33 | "92141-1", 34 | "80382-5", 35 | "80383-3", 36 | "94762-2", 37 | "95542-7", 38 | "94559-2", 39 | "95424-8", 40 | "94565-9", 41 | "82160-3", 42 | "82163-7", 43 | "82161-1", 44 | "82162-9", 45 | "82164-5", 46 | "82165-2", 47 | "82175-1", 48 | "82166-0", 49 | "82167-8", 50 | "82169-4", 51 | "82168-6", 52 | "82170-2", 53 | "88890-9", 54 | "82176-9", 55 | "87621-9", 56 | "82179-3", 57 | "82178-5", 58 | "82177-7", 59 | "94502-2", 60 | "95209-3", 61 | "96119-3", 62 | "94309-2", 63 | "94307-6", 64 | "94308-4", 65 | "68993-5", 66 | "95423-0", 67 | "92131-2", 68 | "96122-7", 69 | "96091-4", 70 | "98069-8", 71 | "96603-6", 72 | "96957-6", 73 | "94547-7", 74 | "94760-6", 75 | "76070-2", 76 | "88610-1", 77 | "88604-4", 78 | "88618-4", 79 | "88626-7", 80 | "77024-8", 81 | "88721-6", 82 | "76084-3", 83 | "76085-0", 84 | "76086-8", 85 | "76087-6", 86 | "77022-2", 87 | "77023-0", 88 | "88718-2", 89 | "88720-8", 90 | "95609-4", 91 | "98132-4", 92 | "94764-8", 93 | "100342-5", 94 | "99597-7", 95 | "96986-5", 96 | "100973-7", 97 | "100974-5", 98 | "92809-3", 99 | "92808-5", 100 | "88613-5", 101 | "90101-7", 102 | "94758-0", 103 | "97098-8", 104 | "95425-5", 105 | "43913-3", 106 | "96742-2", 107 | "96448-6", 108 | "94769-7", 109 | "97104-4", 110 | "94761-4", 111 | "95416-4" 112 | ], 113 | "covid_positive_results": [ 114 | "positive", 115 | "detected", 116 | "detection of sars-cov-2 antigen in a clinical or post-mortem by any method", 117 | "detection of sars-cov-2 genomic sequence by any method", 118 | "detection of sars-cov-2 nucleic acid in a clinical or post-mortem specimen by any method", 119 | "Detection of sars-cov-2 organism or substance in a clinical specimen" 120 | ] 121 | } -------------------------------------------------------------------------------- /scripts/Synapse/updateECRDataStoreIncidentID.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### updateECRDatastoreIncidentID\n", 9 | "This is the 3rd and final step to update the ECR datastore after receiving new MPI data from LAC, after updating the `iris_id` in the `updateECRDatastoreIrisID` notebook.\n", 10 | "\n", 11 | "This notebook syncs `incident_id`s between the Master Incident Index (MII) and the ECR datastore. As new MII data is made available through the `updateMII` Synapse job, the ECR datastore needs to be updated as well. This notebook updates the `incident_id` column in the ECR datastore if there is an entry in the MII with a corresponding `person_id` and the entry has a positive COVID test within 90 days of the ECR datastore's COVID specimen collection date.\n" 12 | ] 13 | }, 14 | { 15 | "attachments": {}, 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "Set up and prep data. Load ECR datastore (`ecr`) and MII delta tables (`mii`). Load the data necessary for identifying positive COVID tests (`covid_test_type_codes`, `covid_positive_results`)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "pip install --upgrade pip" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false, 36 | "jupyter": { 37 | "outputs_hidden": false 38 | } 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "from pyspark.sql import SparkSession\n", 43 | "from delta.tables import *\n", 44 | "from pyspark.sql.functions import *\n", 45 | "\n", 46 | "account_name = \"$STORAGE_ACCOUNT\"\n", 47 | "ECR_DELTA_TABLE_FILE_PATH = f\"abfss://delta-tables@{account_name}.dfs.core.windows.net/ecr-datastore\"\n", 48 | "MII_DELTA_TABLE_FILE_PATH = f\"abfss://delta-tables@{account_name}.dfs.core.windows.net/MII\"\n", 49 | "COVID_IDENTIFICATION_CONFIG_FILE_PATH = f\"abfss://delta-tables@{account_name}.dfs.core.windows.net/covid_identification_config.json\"\n", 50 | "\n", 51 | "spark = SparkSession.builder.getOrCreate()\n", 52 | "\n", 53 | "# Read in data\n", 54 | "ecr = spark.read.format(\"delta\").load(ECR_DELTA_TABLE_FILE_PATH)\n", 55 | "mii = spark.read.format(\"delta\").load(MII_DELTA_TABLE_FILE_PATH).select(\"incident_id\",\"person_id\",\"collection_date\").withColumnRenamed(\"incident_id\",\"incident_id_mii\").withColumnRenamed(\"person_id\",\"person_id_mii\").withColumnRenamed(\"collection_date\",\"specimen_collection_date_mii\")\n", 56 | "\n", 57 | "# Covid identification data\n", 58 | "df = spark.read.json(COVID_IDENTIFICATION_CONFIG_FILE_PATH, multiLine=True)\n", 59 | "covid_test_type_codes = df.select('covid_test_type_codes').rdd.flatMap(lambda x: x).collect()[0]\n", 60 | "covid_positive_results = df.select('covid_positive_results').rdd.flatMap(lambda x: x).collect()[0]" 61 | ] 62 | }, 63 | { 64 | "attachments": {}, 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Create a `comparison_date` column in the ECR datastore. The ECR datastore contains 20 tests and associated specimen collection dates. When updating the `incident_id`, we are only concerned with positive, COVID tests and thus want to use the specimen collection date associated with positive COVID tests only. This block checks each of the tests to see if they are a COVID test (i.e., `test_type_code` is in the list of `covid_test_types_codes`) and whether the test is positive (i.e., the `test_result` is in the list of `covid_positive_results`)." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "jupyter": { 76 | "outputs_hidden": false, 77 | "source_hidden": false 78 | }, 79 | "nteract": { 80 | "transient": { 81 | "deleting": false 82 | } 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# Add `comparison_date` column to ecr data ahead of join with mii to find positive covid tests\n", 88 | "ecr = ecr.withColumn(\"comparison_date\",\n", 89 | " when((lower(ecr.test_type_code_1).isin(covid_test_type_codes) & lower(ecr.test_result_1).isin(covid_positive_results)), ecr.specimen_collection_date_1)\n", 90 | " .when((lower(ecr.test_type_code_2).isin(covid_test_type_codes) & lower(ecr.test_result_2).isin(covid_positive_results)), ecr.specimen_collection_date_2)\n", 91 | " .when((lower(ecr.test_type_code_3).isin(covid_test_type_codes) & lower(ecr.test_result_3).isin(covid_positive_results)), ecr.specimen_collection_date_3)\n", 92 | " .when((lower(ecr.test_type_code_4).isin(covid_test_type_codes) & lower(ecr.test_result_4).isin(covid_positive_results)), ecr.specimen_collection_date_4)\n", 93 | " .when((lower(ecr.test_type_code_5).isin(covid_test_type_codes) & lower(ecr.test_result_5).isin(covid_positive_results)), ecr.specimen_collection_date_5)\n", 94 | " .when((lower(ecr.test_type_code_6).isin(covid_test_type_codes) & lower(ecr.test_result_6).isin(covid_positive_results)), ecr.specimen_collection_date_6)\n", 95 | " .when((lower(ecr.test_type_code_7).isin(covid_test_type_codes) & lower(ecr.test_result_7).isin(covid_positive_results)), ecr.specimen_collection_date_7)\n", 96 | " .when((lower(ecr.test_type_code_8).isin(covid_test_type_codes) & lower(ecr.test_result_8).isin(covid_positive_results)), ecr.specimen_collection_date_8)\n", 97 | " .when((lower(ecr.test_type_code_9).isin(covid_test_type_codes) & lower(ecr.test_result_9).isin(covid_positive_results)), ecr.specimen_collection_date_9)\n", 98 | " .when((lower(ecr.test_type_code_10).isin(covid_test_type_codes) & lower(ecr.test_result_10).isin(covid_positive_results)), ecr.specimen_collection_date_10)\n", 99 | " .when((lower(ecr.test_type_code_11).isin(covid_test_type_codes) & lower(ecr.test_result_11).isin(covid_positive_results)), ecr.specimen_collection_date_11)\n", 100 | " .when((lower(ecr.test_type_code_12).isin(covid_test_type_codes) & lower(ecr.test_result_12).isin(covid_positive_results)), ecr.specimen_collection_date_12)\n", 101 | " .when((lower(ecr.test_type_code_12).isin(covid_test_type_codes) & lower(ecr.test_result_13).isin(covid_positive_results)), ecr.specimen_collection_date_13)\n", 102 | " .when((lower(ecr.test_type_code_14).isin(covid_test_type_codes) & lower(ecr.test_result_14).isin(covid_positive_results)), ecr.specimen_collection_date_14)\n", 103 | " .when((lower(ecr.test_type_code_15).isin(covid_test_type_codes) & lower(ecr.test_result_15).isin(covid_positive_results)), ecr.specimen_collection_date_15)\n", 104 | " .when((lower(ecr.test_type_code_16).isin(covid_test_type_codes) & lower(ecr.test_result_16).isin(covid_positive_results)), ecr.specimen_collection_date_16)\n", 105 | " .when((lower(ecr.test_type_code_17).isin(covid_test_type_codes) & lower(ecr.test_result_17).isin(covid_positive_results)), ecr.specimen_collection_date_17)\n", 106 | " .when((lower(ecr.test_type_code_18).isin(covid_test_type_codes) & lower(ecr.test_result_18).isin(covid_positive_results)), ecr.specimen_collection_date_18)\n", 107 | " .when((lower(ecr.test_type_code_19).isin(covid_test_type_codes) & lower(ecr.test_result_19).isin(covid_positive_results)), ecr.specimen_collection_date_19)\n", 108 | " .when((lower(ecr.test_type_code_20).isin(covid_test_type_codes) & lower(ecr.test_result_20).isin(covid_positive_results)), ecr.specimen_collection_date_20)\n", 109 | " .otherwise(lit(None))\n", 110 | ")" 111 | ] 112 | }, 113 | { 114 | "attachments": {}, 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "Join the MII and ECR Datastore where the IDs match and the MII specimen collection date is within 90 days of the ECR `comparison_date` selected in the previous cell to assemble the updates for the ECR datastore." 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "jupyter": { 126 | "outputs_hidden": false, 127 | "source_hidden": false 128 | }, 129 | "nteract": { 130 | "transient": { 131 | "deleting": false 132 | } 133 | } 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "# Join MII and ECR to get ecr updates (positive covid tests)\n", 138 | "ecr_updates = ecr.join(mii,((ecr.iris_id == mii.person_id_mii) & (datediff(ecr.comparison_date,mii.specimen_collection_date_mii) <= 90)),\"inner\").select(\"iris_id\",\"incident_id_mii\")\n", 139 | "ecr_updates = ecr_updates.toDF(\"iris_id\",\"incident_id_mii\")\n" 140 | ] 141 | }, 142 | { 143 | "attachments": {}, 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "Load the ECR datastore (`ecr_main`) and merge in the updates (`ecr_updates`) such that when a match is found (e.g., a new positive COVID result within 90 days), the `incident_id` column in the ECR datastore is updated." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "jupyter": { 155 | "outputs_hidden": false, 156 | "source_hidden": false 157 | }, 158 | "nteract": { 159 | "transient": { 160 | "deleting": false 161 | } 162 | } 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# Load ecr delta table\n", 167 | "ecr_main = DeltaTable.forPath(spark,ECR_DELTA_TABLE_FILE_PATH)\n", 168 | "\n", 169 | "# Merge in ecr updates such that the incident_id is updated\n", 170 | "ecr_main.alias(\"ecr\") \\\n", 171 | " .merge(\n", 172 | " ecr_updates.alias(\"ecr_updates\"),\n", 173 | " \"ecr.person_id = ecr_updates.iris_id\") \\\n", 174 | " .whenMatchedUpdate(set = {\"incident_id\": \"ecr_updates.incident_id_mii\",\"incident_id_date_added\": date_format(current_timestamp(), 'yyyy-MM-dd') }) \\\n", 175 | " .execute()\n" 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "description": null, 181 | "kernel_info": { 182 | "name": "synapse_pyspark" 183 | }, 184 | "kernelspec": { 185 | "display_name": "Synapse PySpark", 186 | "language": "Python", 187 | "name": "synapse_pyspark" 188 | }, 189 | "language_info": { 190 | "name": "python" 191 | }, 192 | "save_output": false, 193 | "synapse_widget": { 194 | "state": {}, 195 | "version": "0.1" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 2 200 | } 201 | -------------------------------------------------------------------------------- /scripts/Synapse/updateECRDataStoreIrisID.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### updateECRDatastoreIrisID\n", 9 | "This is the 2nd step to update the ECR datastore after receiving new MPI data from LAC, after updating the `person_id` in the `updateECRDatastorePersonID` notebook.\n", 10 | "\n", 11 | "This notebook joins the ECR datastore (`ecr`) and Person table from the Master Patient Index (`person`) on `person_id` to update the ECR datastore's `iris_id` with the most up-to-date `external_person_id` in the `person` table." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "pip install --upgrade pip" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "jupyter": { 28 | "outputs_hidden": true 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "pip install psycopg2-binary azure-identity" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "jupyter": { 41 | "outputs_hidden": false, 42 | "source_hidden": false 43 | }, 44 | "nteract": { 45 | "transient": { 46 | "deleting": false 47 | } 48 | } 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import psycopg2\n", 53 | "from delta.tables import *\n", 54 | "from pyspark.sql.functions import *\n", 55 | "from azure.identity import DefaultAzureCredential" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "jupyter": { 63 | "outputs_hidden": false, 64 | "source_hidden": false 65 | }, 66 | "nteract": { 67 | "transient": { 68 | "deleting": false 69 | } 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "storage_account_name = \"$STORAGE_ACCOUNT\"\n", 75 | "ECR_DELTA_TABLE_FILE_PATH = f\"abfss://delta-tables@{storage_account_name}.dfs.core.windows.net/ecr-datastore\" # changed\n", 76 | "\n", 77 | "\n", 78 | "# Set your Key Vault information\n", 79 | "vault_name = \"$KEY_VAULT\"\n", 80 | "KEY_VAULT_URL = f\"https://{vault_name}.vault.azure.net\"\n", 81 | "\n", 82 | "vault_linked_service = \"$KEY_VAULT_LINKED_SERVICE\"\n", 83 | "credential = DefaultAzureCredential()\n", 84 | "db_password = TokenLibrary.getSecret(vault_name,\"mpi-db-password\",vault_linked_service)\n", 85 | "\n", 86 | "# Database connection parameters\n", 87 | "DB_NAME = \"DibbsMpiDB\"\n", 88 | "DB_USER = \"postgres\"\n", 89 | "DB_HOST = \"$MPI_DB_HOST\"\n", 90 | "DB_PORT = \"5432\"\n", 91 | "DB_TABLE = \"person\"\n", 92 | "\n", 93 | "\n", 94 | "# Connect to the database\n", 95 | "conn = psycopg2.connect(\n", 96 | " dbname=DB_NAME,\n", 97 | " user=DB_USER,\n", 98 | " password=db_password,\n", 99 | " host=DB_HOST,\n", 100 | " port=DB_PORT\n", 101 | ")\n", 102 | "\n", 103 | "# Create a cursor\n", 104 | "cur = conn.cursor()\n", 105 | "\n", 106 | "# Execute the query to get the list of tables in the database\n", 107 | "cur.execute(f\"\"\"\n", 108 | " SELECT person_id, external_person_id\n", 109 | " FROM {DB_TABLE};\n", 110 | "\"\"\")\n", 111 | "\n", 112 | "# Fetch the results\n", 113 | "data = cur.fetchall()\n", 114 | "data\n", 115 | "\n", 116 | "# Close the cursor and connection\n", 117 | "cur.close()\n", 118 | "conn.close()\n", 119 | "\n", 120 | "\n", 121 | "# Prep the MPI data for merging with ECR data \n", 122 | "columns=['person_id','external_person_id']\n", 123 | "person = spark.createDataFrame(data = data, schema = columns)\n", 124 | "\n", 125 | "\n", 126 | "# Load ecr Delta table\n", 127 | "ecr = DeltaTable.forPath(spark,ECR_DELTA_TABLE_FILE_PATH)\n", 128 | "\n", 129 | "# Update ecr data with `external_person_id` from MPI by joining on `person_id`\n", 130 | "ecr.alias(\"ecr\") \\\n", 131 | " .merge(\n", 132 | " person.alias(\"mpi_person\"),\n", 133 | " \"ecr.person_id = mpi_person.person_id\") \\\n", 134 | " .whenMatchedUpdate(set = { \"iris_id\": \"mpi_person.external_person_id\", \"iris_id_date_added\": date_format(current_timestamp(), 'yyyy-MM-dd') }) \\\n", 135 | " .execute()\n" 136 | ] 137 | } 138 | ], 139 | "metadata": { 140 | "description": null, 141 | "kernel_info": { 142 | "name": "synapse_pyspark" 143 | }, 144 | "kernelspec": { 145 | "display_name": "Synapse PySpark", 146 | "language": "Python", 147 | "name": "synapse_pyspark" 148 | }, 149 | "language_info": { 150 | "name": "python" 151 | }, 152 | "save_output": true, 153 | "synapse_widget": { 154 | "state": {}, 155 | "version": "0.1" 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 2 160 | } 161 | -------------------------------------------------------------------------------- /scripts/Synapse/updateECRDataStorePersonID.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### updateECRDatastorePersonID\n", 9 | "This is the 1st step to update the ECR datastore after receiving new MPI data from LAC.\n", 10 | "\n", 11 | "This notebook joins the ECR datastore (`ecr`) and Patient table from the Master Patient Index (`patient`) on `patient_id` to update the ECR datastore's `person_id` with the most up-to-date `person_id` in the `patient` table. " 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "pip install --upgrade pip" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "jupyter": { 28 | "outputs_hidden": true 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "pip install psycopg2-binary azure-identity" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "jupyter": { 41 | "outputs_hidden": false, 42 | "source_hidden": false 43 | }, 44 | "nteract": { 45 | "transient": { 46 | "deleting": false 47 | } 48 | } 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "from azure.identity import DefaultAzureCredential\n", 53 | "import psycopg2\n", 54 | "from delta.tables import *\n", 55 | "from pyspark.sql.functions import *" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "jupyter": { 63 | "outputs_hidden": false, 64 | "source_hidden": false 65 | }, 66 | "nteract": { 67 | "transient": { 68 | "deleting": false 69 | } 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "storage_account_name = \"$STORAGE_ACCOUNT\"\n", 75 | "ECR_DELTA_TABLE_FILE_PATH = f\"abfss://delta-tables@{storage_account_name}.dfs.core.windows.net/ecr-datastore\" \n", 76 | "\n", 77 | "# Set your Key Vault information and Key Vault linked service\n", 78 | "vault_name = \"$KEY_VAULT\"\n", 79 | "vault_linked_service = \"$KEY_VAULT_LINKED_SERVICE\"\n", 80 | "credential = DefaultAzureCredential()\n", 81 | "\n", 82 | "# Database connection parameters\n", 83 | "DB_NAME = \"DibbsMpiDB\"\n", 84 | "DB_USER = \"postgres\"\n", 85 | "DB_HOST = \"$MPI_DB_HOST\"\n", 86 | "DB_PORT = \"5432\"\n", 87 | "DB_TABLE = \"patient\"\n", 88 | "\n", 89 | "# Get the secret value (password) from the previous step\n", 90 | "db_password = TokenLibrary.getSecret(vault_name,\"mpi-db-password\",vault_linked_service)\n", 91 | "\n", 92 | "# Connect to the database\n", 93 | "conn = psycopg2.connect(\n", 94 | " dbname=DB_NAME,\n", 95 | " user=DB_USER,\n", 96 | " password=db_password,\n", 97 | " host=DB_HOST,\n", 98 | " port=DB_PORT\n", 99 | ")\n", 100 | "\n", 101 | "# Create a cursor\n", 102 | "cur = conn.cursor()\n", 103 | "\n", 104 | "# Execute the query to get the list of tables in the database\n", 105 | "cur.execute(f\"\"\"\n", 106 | " SELECT patient_id,person_id\n", 107 | " FROM {DB_TABLE};\n", 108 | "\"\"\")\n", 109 | "\n", 110 | "# Fetch the results\n", 111 | "data = cur.fetchall()\n", 112 | "\n", 113 | "# Close the cursor and connection\n", 114 | "cur.close()\n", 115 | "conn.close()\n", 116 | "\n", 117 | "\n", 118 | "# Prep the MPI data for merging with ECR data \n", 119 | "columns=['patient_id','person_id']\n", 120 | "patient = spark.createDataFrame(data = data, schema = columns)\n", 121 | "\n", 122 | "\n", 123 | "# Load ecr Delta table\n", 124 | "ecr = DeltaTable.forPath(spark,ECR_DELTA_TABLE_FILE_PATH)\n", 125 | "\n", 126 | "# Update ecr data with `person_id` from MPI by joining on `patient_id`\n", 127 | "ecr.alias(\"ecr\") \\\n", 128 | " .merge(\n", 129 | " patient.alias(\"mpi_patient\"),\n", 130 | " \"ecr.patient_id = mpi_patient.patient_id\") \\\n", 131 | " .whenMatchedUpdate(set = { \"person_id\": \"mpi_patient.person_id\", \"person_id_date_added\": date_format(current_timestamp(), 'yyyy-MM-dd')}) \\\n", 132 | " .execute()\n", 133 | "\n" 134 | ] 135 | } 136 | ], 137 | "metadata": { 138 | "description": null, 139 | "kernel_info": { 140 | "name": "synapse_pyspark" 141 | }, 142 | "kernelspec": { 143 | "display_name": "Synapse PySpark", 144 | "language": "Python", 145 | "name": "synapse_pyspark" 146 | }, 147 | "language_info": { 148 | "name": "python" 149 | }, 150 | "save_output": true, 151 | "synapse_widget": { 152 | "state": {}, 153 | "version": "0.1" 154 | } 155 | }, 156 | "nbformat": 4, 157 | "nbformat_minor": 2 158 | } 159 | -------------------------------------------------------------------------------- /scripts/Synapse/updateMII.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### updateMII\n", 9 | "\n", 10 | "This notebook inserts and updates data from an uploaded parquet file (`mii_incoming_file_path`) into a Master Incident Index (MII) delta table (`mii_delta_table_path`). " 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "pip install --upgrade pip" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "jupyter": { 27 | "outputs_hidden": false, 28 | "source_hidden": false 29 | }, 30 | "nteract": { 31 | "transient": { 32 | "deleting": false 33 | } 34 | }, 35 | "tags": [ 36 | "parameters" 37 | ] 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "filename=\"\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "jupyter": { 49 | "outputs_hidden": false 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "from pyspark.sql import SparkSession\n", 55 | "from delta.tables import DeltaTable\n", 56 | "from pyspark.sql.functions import col\n", 57 | "from notebookutils import mssparkutils\n", 58 | "\n", 59 | "spark = SparkSession.builder.getOrCreate()\n", 60 | "\n", 61 | "# Set up file client\n", 62 | "storage_account = \"$STORAGE_ACCOUNT\"\n", 63 | "mii_incoming_file_path = f\"abfss://patient-data@{storage_account}.dfs.core.windows.net/{filename}\"\n", 64 | "mii_delta_table_path = f\"abfss://delta-tables@{storage_account}.dfs.core.windows.net/MII\"\n", 65 | "\n", 66 | "\n", 67 | "def update(mii_incoming_file_path,mii_delta_table_path):\n", 68 | " mii_updates = spark.read.parquet(mii_incoming_file_path)\n", 69 | "\n", 70 | " # Check if MII Delta table exists\n", 71 | " if DeltaTable.isDeltaTable(spark, mii_delta_table_path):\n", 72 | " # If the table exists, update records\n", 73 | " mii_main = DeltaTable.forPath(spark, mii_delta_table_path)\n", 74 | "\n", 75 | " mii_main.alias(\"mii_main\") \\\n", 76 | " .merge(\n", 77 | " mii_updates.alias(\"mii_updates\"),\n", 78 | " \"mii_updates.person_id = mii_main.person_id AND mii_updates.incident_id = mii_main.incident_id\") \\\n", 79 | " .whenMatchedUpdate(set ={\n", 80 | " \"collection_date\":\"mii_updates.collection_date\",\n", 81 | " \"record_type\":\"mii_updates.record_type\",\n", 82 | " \"episode_date\": \"mii_updates.episode_date\",\n", 83 | " \"process_status\": \"mii_updates.process_status\",\n", 84 | " \"resolution_status\": \"mii_updates.resolution_status\"}) \\\n", 85 | " .whenNotMatchedInsert(values = { \"person_id\": col(\"mii_updates.person_id\"),\n", 86 | " \"incident_id\": col(\"mii_updates.incident_id\"),\n", 87 | " \"collection_date\": col(\"mii_updates.collection_date\"),\n", 88 | " \"record_type\": col(\"mii_updates.record_type\"),\n", 89 | " \"episode_date\": col(\"mii_updates.episode_date\"),\n", 90 | " \"process_status\": col(\"mii_updates.process_status\"),\n", 91 | " \"resolution_status\": col(\"mii_updates.resolution_status\")}) \\\n", 92 | " .execute()\n", 93 | " else:\n", 94 | " # If Delta table doesn't exist, create it.\n", 95 | " mii_updates.write.format(\"delta\").mode(\"append\").save(mii_delta_table_path)\n", 96 | "\n", 97 | " \n", 98 | "\n", 99 | " \n", 100 | "update(mii_incoming_file_path,mii_delta_table_path)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "jupyter": { 108 | "outputs_hidden": false, 109 | "source_hidden": false 110 | }, 111 | "nteract": { 112 | "transient": { 113 | "deleting": false 114 | } 115 | } 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# Move file that triggered the MII update event into the archive folder\n", 120 | "destination = f\"abfss://patient-data@{storage_account}.dfs.core.windows.net/archive/{filename}\"\n", 121 | "mssparkutils.fs.mv(src=mii_incoming_file_path,dest=destination,create_path=True)" 122 | ] 123 | } 124 | ], 125 | "metadata": { 126 | "description": null, 127 | "kernelspec": { 128 | "display_name": "Synapse PySpark", 129 | "name": "synapse_pyspark" 130 | }, 131 | "language_info": { 132 | "name": "python" 133 | }, 134 | "save_output": true, 135 | "synapse_widget": { 136 | "state": {}, 137 | "version": "0.1" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/.part-00000-f4dc767c-0c82-4b10-8b6b-3b1b58486a0a-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/.part-00000-f4dc767c-0c82-4b10-8b6b-3b1b58486a0a-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/.part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/.part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/.part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/.part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/_delta_log/.00000000000000000000.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/_delta_log/.00000000000000000000.json.crc -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/_delta_log/00000000000000000000.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1682621593938,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"3","numOutputRows":"2","numOutputBytes":"29260"},"engineInfo":"Apache-Spark/3.3.2 Delta-Lake/2.3.0","txnId":"ca862d1d-ac56-4684-b92c-be4dffae1bea"}} 2 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} 3 | {"metaData":{"id":"92fb04d8-ef6c-43c5-abcf-7bda1c8a3153","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"patient_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"person_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"rr_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"status\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"conditions\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"eicr_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"eicr_version_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"authoring_datetime\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"provider_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"facility_id_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"facility_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"facility_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"encounter_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"encounter_start_date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"encounter_end_date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_date_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_date_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_3\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_date_3\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_4\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_date_4\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_5\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"active_problem_date_5\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"reason_for_visit\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"test_type_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"test_result_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"test_result_interp_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"specimen_type_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"performing_lab_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"specimen_collection_date_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"result_date_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"test_type_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"test_result_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"test_result_interp_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"specimen_type_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"performing_lab_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"specimen_collection_date_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"result_date_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"incident_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1682621593041}} 4 | {"add":{"path":"part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet","partitionValues":{},"size":11752,"modificationTime":1682621593768,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"patient_id\":\"2c6d5fd1-4a70-11eb-99fd-ad786a82\",\"person_id\":\"a81bc81b-dead-4e5d-abff-90865d1e\",\"last_name\":\"Shepard\",\"first_name\":\"John\",\"rr_id\":\"12-34-56-78\",\"status\":\"12\",\"conditions\":\"\",\"eicr_id\":\"11111111\",\"eicr_version_number\":\"2\",\"authoring_datetime\":\"\",\"provider_id\":\"999\",\"facility_id_number\":\"1\",\"facility_name\":\"Huerta Memorial Hospital\",\"facility_type\":\"\",\"encounter_type\":\"encounter\",\"encounter_start_date\":\"2021-08-14\",\"encounter_end_date\":\"2021-08-16\",\"active_problem_1\":\"\",\"active_problem_date_1\":\"\",\"active_problem_2\":\"\",\"active_problem_date_2\":\"\",\"active_problem_3\":\"\",\"active_problem_date_3\":\"\",\"active_problem_4\":\"\",\"active_problem_date_4\":\"\",\"active_problem_5\":\"\",\"active_problem_date_5\":\"\",\"reason_for_visit\":\"physical\",\"test_type_1\":\"\",\"test_result_1\":\"\",\"test_result_interp_1\":\"\",\"specimen_type_1\":\"\"},\"maxValues\":{\"patient_id\":\"2c6d5fd1-4a70-11eb-99fd-ad786a82�\",\"person_id\":\"a81bc81b-dead-4e5d-abff-90865d1e�\",\"last_name\":\"Shepard\",\"first_name\":\"John\",\"rr_id\":\"12-34-56-78\",\"status\":\"12\",\"conditions\":\"\",\"eicr_id\":\"11111111\",\"eicr_version_number\":\"2\",\"authoring_datetime\":\"\",\"provider_id\":\"999\",\"facility_id_number\":\"1\",\"facility_name\":\"Huerta Memorial Hospital\",\"facility_type\":\"\",\"encounter_type\":\"encounter\",\"encounter_start_date\":\"2021-08-14\",\"encounter_end_date\":\"2021-08-16\",\"active_problem_1\":\"\",\"active_problem_date_1\":\"\",\"active_problem_2\":\"\",\"active_problem_date_2\":\"\",\"active_problem_3\":\"\",\"active_problem_date_3\":\"\",\"active_problem_4\":\"\",\"active_problem_date_4\":\"\",\"active_problem_5\":\"\",\"active_problem_date_5\":\"\",\"reason_for_visit\":\"physical\",\"test_type_1\":\"\",\"test_result_1\":\"\",\"test_result_interp_1\":\"\",\"specimen_type_1\":\"\"},\"nullCount\":{\"patient_id\":0,\"person_id\":0,\"last_name\":0,\"first_name\":0,\"rr_id\":0,\"status\":0,\"conditions\":0,\"eicr_id\":0,\"eicr_version_number\":0,\"authoring_datetime\":0,\"provider_id\":0,\"facility_id_number\":0,\"facility_name\":0,\"facility_type\":0,\"encounter_type\":0,\"encounter_start_date\":0,\"encounter_end_date\":0,\"active_problem_1\":0,\"active_problem_date_1\":0,\"active_problem_2\":0,\"active_problem_date_2\":0,\"active_problem_3\":0,\"active_problem_date_3\":0,\"active_problem_4\":0,\"active_problem_date_4\":0,\"active_problem_5\":0,\"active_problem_date_5\":0,\"reason_for_visit\":0,\"test_type_1\":0,\"test_result_1\":0,\"test_result_interp_1\":0,\"specimen_type_1\":0}}"}} 5 | {"add":{"path":"part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet","partitionValues":{},"size":12797,"modificationTime":1682621593768,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"patient_id\":\"2fdd0b8b-4a70-11eb-99fd-ad786a82\",\"person_id\":\"a81bc81b-dead-4e5d-abff-90865d1e\",\"last_name\":\"Anderson\",\"first_name\":\"David\",\"rr_id\":\"97-56-4862\",\"status\":\"24\",\"conditions\":\"\",\"eicr_id\":\"99999\",\"eicr_version_number\":\"168\",\"authoring_datetime\":\"2022-12-12\",\"provider_id\":\"6d8e9s-98w7szz\",\"facility_id_number\":\"84yfd3556d\",\"facility_name\":\"Sunset Strip\",\"facility_type\":\"outpatient\",\"encounter_type\":\"OKI\",\"encounter_start_date\":\"2022-12-02\",\"encounter_end_date\":\"2022-12-11\",\"active_problem_1\":\"arthritis\",\"active_problem_date_1\":\"2020-10-10\",\"active_problem_2\":\"\",\"active_problem_date_2\":\"\",\"active_problem_3\":\"\",\"active_problem_date_3\":\"\",\"active_problem_4\":\"\",\"active_problem_date_4\":\"\",\"active_problem_5\":\"\",\"active_problem_date_5\":\"\",\"reason_for_visit\":\"concern\",\"test_type_1\":\"degenerative disk test\",\"test_result_1\":\"positive\",\"test_result_interp_1\":\"patient has a bad back\",\"specimen_type_1\":\"vertebrae fluid\"},\"maxValues\":{\"patient_id\":\"2fdd0b8b-4a70-11eb-99fd-ad786a82�\",\"person_id\":\"a81bc81b-dead-4e5d-abff-90865d1e�\",\"last_name\":\"Anderson\",\"first_name\":\"David\",\"rr_id\":\"97-56-4862\",\"status\":\"24\",\"conditions\":\"\",\"eicr_id\":\"99999\",\"eicr_version_number\":\"168\",\"authoring_datetime\":\"2022-12-12\",\"provider_id\":\"6d8e9s-98w7szz\",\"facility_id_number\":\"84yfd3556d\",\"facility_name\":\"Sunset Strip\",\"facility_type\":\"outpatient\",\"encounter_type\":\"OKI\",\"encounter_start_date\":\"2022-12-02\",\"encounter_end_date\":\"2022-12-11\",\"active_problem_1\":\"arthritis\",\"active_problem_date_1\":\"2020-10-10\",\"active_problem_2\":\"\",\"active_problem_date_2\":\"\",\"active_problem_3\":\"\",\"active_problem_date_3\":\"\",\"active_problem_4\":\"\",\"active_problem_date_4\":\"\",\"active_problem_5\":\"\",\"active_problem_date_5\":\"\",\"reason_for_visit\":\"concern\",\"test_type_1\":\"degenerative disk test\",\"test_result_1\":\"positive\",\"test_result_interp_1\":\"patient has a bad back\",\"specimen_type_1\":\"vertebrae fluid\"},\"nullCount\":{\"patient_id\":0,\"person_id\":0,\"last_name\":0,\"first_name\":0,\"rr_id\":0,\"status\":0,\"conditions\":0,\"eicr_id\":0,\"eicr_version_number\":0,\"authoring_datetime\":0,\"provider_id\":0,\"facility_id_number\":0,\"facility_name\":0,\"facility_type\":0,\"encounter_type\":0,\"encounter_start_date\":0,\"encounter_end_date\":0,\"active_problem_1\":0,\"active_problem_date_1\":0,\"active_problem_2\":0,\"active_problem_date_2\":0,\"active_problem_3\":0,\"active_problem_date_3\":0,\"active_problem_4\":0,\"active_problem_date_4\":0,\"active_problem_5\":0,\"active_problem_date_5\":0,\"reason_for_visit\":0,\"test_type_1\":0,\"test_result_1\":0,\"test_result_interp_1\":0,\"specimen_type_1\":0}}"}} 6 | -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/part-00000-f4dc767c-0c82-4b10-8b6b-3b1b58486a0a-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/part-00000-f4dc767c-0c82-4b10-8b6b-3b1b58486a0a-c000.snappy.parquet -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/part-00017-4fab7ac2-2d38-4a71-8638-9b552dacc55a-c000.snappy.parquet -------------------------------------------------------------------------------- /scripts/assets/test_delta_lake/part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CDCgov/phdi-azure/533587fc61dba5f4ecc08aed5523055ea0bdc0a0/scripts/assets/test_delta_lake/part-00023-ebe6c6a3-864e-4865-9eb5-d5001fd84555-c000.snappy.parquet -------------------------------------------------------------------------------- /scripts/get_sha.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | echo '{"sha": "'"$(git rev-parse HEAD)"'"}' -------------------------------------------------------------------------------- /serverless-functions/ReadSourceData/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "type": "queueTrigger", 6 | "connection": "AzureStorageQueuesConnectionString", 7 | "name": "message", 8 | "queueName": "sourcedataqueue", 9 | "direction": "in" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /serverless-functions/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 13 | "version": "[3.3.0, 4.0.0)" 14 | }, 15 | "extensions": { 16 | "queues": { 17 | "maxPollingInterval": "00:00:02", 18 | "visibilityTimeout": "00:00:30", 19 | "batchSize": 16, 20 | "maxDequeueCount": 2, 21 | "newBatchThreshold": 8, 22 | "messageEncoding": "base64" 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /serverless-functions/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-functions 2 | azure-identity 3 | azure-mgmt-datafactory 4 | azure-storage-blob 5 | azure-storage-queue 6 | phdi 7 | lxml -------------------------------------------------------------------------------- /serverless-functions/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | black 3 | flake8 -------------------------------------------------------------------------------- /serverless-functions/tests/ReadSourceData/CDA_RR.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Reportability Response 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 |
27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 |
60 |
61 |
-------------------------------------------------------------------------------- /serverless-functions/tests/ReadSourceData/CDA_eICR.xml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /serverless-functions/tests/ReadSourceData/test_fhir_bundle.json: -------------------------------------------------------------------------------- 1 | { 2 | "resourceType": "Bundle", 3 | "identifier": { 4 | "value": "a very contrived FHIR bundle" 5 | }, 6 | "entry": [ 7 | { 8 | "resource": { 9 | "resourceType": "Organization", 10 | "id": "some-org-we-dont-care-about" 11 | } 12 | }, 13 | { 14 | "resource": { 15 | "resourceType": "Patient", 16 | "id": "some-uuid", 17 | "identifier": [ 18 | { 19 | "value": "123456", 20 | "type": { 21 | "coding": [ 22 | { 23 | "code": "MR", 24 | "system": "http://terminology.hl7.org/CodeSystem/v2-0203", 25 | "display": "Medical record number" 26 | } 27 | ] 28 | }, 29 | "system": "urn...no idea" 30 | } 31 | ], 32 | "name": [ 33 | { 34 | "family": "doe", 35 | "given": [ 36 | "John ", 37 | " Danger " 38 | ], 39 | "use": "official" 40 | } 41 | ], 42 | "birthDate": "1983-02-01", 43 | "gender": "female", 44 | "address": [ 45 | { 46 | "line": [ 47 | "123 Fake St", 48 | "Unit #F" 49 | ], 50 | "BuildingNumber": "123", 51 | "city": "Faketon", 52 | "state": "NY", 53 | "postalCode": "10001-0001", 54 | "country": "USA", 55 | "use": "home" 56 | } 57 | ], 58 | "telecom": [ 59 | { 60 | "use": "home", 61 | "system": "phone", 62 | "value": "123-456-7890" 63 | }, 64 | { 65 | "value": "johndanger@doe.net", 66 | "system": "email" 67 | } 68 | ] 69 | }, 70 | "request": { 71 | "method": "GET", 72 | "url": "testing for entry with no resource" 73 | } 74 | } 75 | ] 76 | } 77 | -------------------------------------------------------------------------------- /terraform/implementation/backend.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | azurerm = { 4 | source = "hashicorp/azurerm" 5 | version = "= 3.66.0" 6 | } 7 | azuread = { 8 | source = "hashicorp/azuread" 9 | version = "= 2.37.2" 10 | } 11 | } 12 | 13 | backend "azurerm" { 14 | container_name = "tfstate" 15 | key = "prod.terraform.tfstate" 16 | } 17 | } 18 | 19 | provider "azurerm" { 20 | use_oidc = var.use_oidc 21 | features {} 22 | } 23 | 24 | provider "azuread" { 25 | use_oidc = var.use_oidc 26 | } -------------------------------------------------------------------------------- /terraform/implementation/main.tf: -------------------------------------------------------------------------------- 1 | // Load modules here 2 | 3 | module "shared" { 4 | source = "../modules/shared" 5 | resource_group_name = var.resource_group_name 6 | location = var.location 7 | smarty_auth_id = var.smarty_auth_id 8 | smarty_auth_token = var.smarty_auth_token 9 | smarty_license_type = var.smarty_license_type 10 | client_id = var.client_id 11 | object_id = var.object_id 12 | ghcr_username = var.ghcr_username 13 | ghcr_token = var.ghcr_token 14 | log_analytics_workspace_id = module.read_source_data.log_analytics_workspace_id 15 | } 16 | 17 | 18 | module "data_factory" { 19 | source = "../modules/data_factory" 20 | resource_group_name = var.resource_group_name 21 | location = var.location 22 | fhir_converter_url = module.shared.fhir_converter_url 23 | ingestion_container_url = module.shared.ingestion_container_url 24 | validation_container_url = module.shared.validation_container_url 25 | message_parser_url = module.shared.message_parser_url 26 | fhir_server_url = module.shared.fhir_server_url 27 | phi_storage_account_endpoint_url = module.shared.phi_storage_account_endpoint_url 28 | record_linkage_container_url = module.shared.record_linkage_container_url 29 | pipeline_runner_id = module.shared.pipeline_runner_id 30 | pipeline_runner_principal_id = module.shared.pipeline_runner_principal_id 31 | validation_failures_container_name = module.shared.validation_failures_container_name 32 | pipeline_runner_resource_id = module.shared.pipeline_runner_resource_id 33 | fhir_upload_failures_container_name = module.shared.fhir_upload_failures_container_name 34 | fhir_conversion_failures_container_name = module.shared.fhir_conversion_failures_container_name 35 | delta_tables_container_name = module.shared.delta_tables_container_name 36 | client_id = var.client_id 37 | key_vault_name = module.shared.key_vault_name 38 | phi_storage_account_name = module.shared.phi_storage_account_name 39 | } 40 | 41 | 42 | module "read_source_data" { 43 | source = "../modules/read_source_data" 44 | resource_group_name = var.resource_group_name 45 | location = var.location 46 | phdi_data_factory_name = module.data_factory.phdi_data_factory_name 47 | ingestion_pipeline_name = module.data_factory.ingestion_pipeline_name 48 | subscription_id = var.subscription_id 49 | pipeline_runner_id = module.shared.pipeline_runner_id 50 | pipeline_runner_client_id = module.shared.pipeline_runner_client_id 51 | client_id = var.client_id 52 | wait_time = 10 53 | sleep_time = 1 54 | ingestion_container_url = module.shared.ingestion_container_url 55 | record_linkage_container_url = module.shared.record_linkage_container_url 56 | message_parser_url = module.shared.message_parser_url 57 | phi_storage_account_connection_string = module.shared.phi_storage_account_connection_string 58 | staging_queue_url = module.shared.staging_queue_url 59 | } 60 | 61 | output "record_linkage_container_url" { 62 | value = module.shared.record_linkage_container_url 63 | } 64 | -------------------------------------------------------------------------------- /terraform/implementation/variables.tf: -------------------------------------------------------------------------------- 1 | variable "subscription_id" { 2 | description = "value of the Azure Subscription ID to use" 3 | } 4 | 5 | variable "location" { 6 | description = "value of the Azure location to deploy to" 7 | default = "Central US" 8 | } 9 | 10 | variable "resource_group_name" { 11 | description = "value of the Azure resource group to deploy to" 12 | } 13 | 14 | variable "smarty_auth_id" { 15 | description = "value of the SmartyStreets Auth ID" 16 | } 17 | 18 | variable "smarty_auth_token" { 19 | description = "value of the SmartyStreets Auth Token" 20 | } 21 | 22 | variable "smarty_license_type" { 23 | type = string 24 | description = "value of the SmartyStreets license type to use" 25 | } 26 | 27 | variable "client_id" { 28 | description = "Client ID" 29 | } 30 | 31 | variable "object_id" { 32 | description = "Object ID" 33 | } 34 | 35 | variable "ghcr_username" { 36 | type = string 37 | description = "GitHub Container Registry username." 38 | } 39 | 40 | variable "ghcr_token" { 41 | type = string 42 | description = "GitHub Container Registry token." 43 | } 44 | 45 | variable "use_oidc" { 46 | type = bool 47 | description = "Use OIDC for authentication." 48 | default = false 49 | } -------------------------------------------------------------------------------- /terraform/modules/data_factory/data.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_client_config" "current" {} -------------------------------------------------------------------------------- /terraform/modules/data_factory/main.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_data_factory" "phdi_data_factory" { 2 | name = "phdi-${terraform.workspace}-data-factory-${substr(var.client_id, 0, 8)}" 3 | location = var.location 4 | resource_group_name = var.resource_group_name 5 | public_network_enabled = false 6 | managed_virtual_network_enabled = true 7 | 8 | identity { 9 | type = "UserAssigned" 10 | identity_ids = [var.pipeline_runner_id] 11 | } 12 | 13 | lifecycle { 14 | ignore_changes = [ 15 | tags 16 | ] 17 | } 18 | 19 | tags = { 20 | environment = terraform.workspace 21 | managed-by = "terraform" 22 | } 23 | } 24 | 25 | resource "azurerm_role_assignment" "data_factory_contributor" { 26 | scope = azurerm_data_factory.phdi_data_factory.id 27 | role_definition_name = "Contributor" 28 | principal_id = var.pipeline_runner_principal_id 29 | } 30 | 31 | resource "null_resource" "adf_credential" { 32 | provisioner "local-exec" { 33 | command = <<-EOT 34 | # Get an access token for Azure Management API 35 | access_token=$(az account get-access-token --query 'accessToken' -o tsv) 36 | 37 | # Define the credential JSON payload 38 | credential_payload=$(cat <<-JSON 39 | { 40 | "properties": { 41 | "type": "ManagedIdentity", 42 | "typeProperties": { 43 | "resourceId": "${var.pipeline_runner_resource_id}" 44 | } 45 | } 46 | } 47 | JSON 48 | ) 49 | 50 | # Create the credential in Azure Data Factory 51 | az rest --method put \ 52 | --uri "https://management.azure.com/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${var.resource_group_name}/providers/Microsoft.DataFactory/factories/${azurerm_data_factory.phdi_data_factory.name}/credentials/pipeline-runner-credential?api-version=2018-06-01" \ 53 | --headers "Content-Type=application/json" \ 54 | --headers "Authorization=Bearer $access_token" \ 55 | --body "$credential_payload" 56 | EOT 57 | } 58 | 59 | depends_on = [azurerm_data_factory.phdi_data_factory, azurerm_role_assignment.data_factory_contributor] 60 | } 61 | 62 | locals { 63 | ingestion-pipeline-config = jsondecode(templatefile("../modules/data_factory/ingestion-pipeline.json", { 64 | validation_container_url = var.validation_container_url, 65 | environment = terraform.workspace, 66 | fhir_converter_url = var.fhir_converter_url, 67 | ingestion_container_url = var.ingestion_container_url, 68 | fhir_server_url = var.fhir_server_url, 69 | message_parser_url = var.message_parser_url, 70 | storage_account_url = var.phi_storage_account_endpoint_url, 71 | validation_failures_container_name = var.validation_failures_container_name, 72 | fhir_upload_failures_container_name = var.fhir_upload_failures_container_name, 73 | fhir_conversion_failures_container_name = var.fhir_conversion_failures_container_name, 74 | record_linkage_container_url = var.record_linkage_container_url, 75 | delta_tables_container_name = var.delta_tables_container_name 76 | })) 77 | pipeline-metrics-dashboard-config = jsondecode(templatefile("../modules/data_factory/pipeline-metrics-dashboard.json", { 78 | data_factory_id = azurerm_data_factory.phdi_data_factory.id, 79 | environment = terraform.workspace, 80 | })) 81 | } 82 | 83 | resource "azurerm_data_factory_pipeline" "phdi_ingestion" { 84 | name = "phdi-${terraform.workspace}-ingestion" 85 | data_factory_id = azurerm_data_factory.phdi_data_factory.id 86 | parameters = { 87 | "filename" : "", 88 | "message" : "", 89 | "message_type" : "", 90 | "root_template" : "", 91 | "include_error_types" : "" 92 | } 93 | 94 | activities_json = jsonencode(local.ingestion-pipeline-config.properties.activities) 95 | 96 | depends_on = [null_resource.adf_credential] 97 | } 98 | 99 | ##### Pipeline metrics dashboard ##### 100 | 101 | resource "azurerm_portal_dashboard" "pipeline_metrics" { 102 | name = "pipeline-metrics-${terraform.workspace}" 103 | resource_group_name = var.resource_group_name 104 | location = var.location 105 | tags = { 106 | source = "terraform" 107 | } 108 | 109 | dashboard_properties = jsonencode(local.pipeline-metrics-dashboard-config.properties) 110 | } 111 | -------------------------------------------------------------------------------- /terraform/modules/data_factory/outputs.tf: -------------------------------------------------------------------------------- 1 | output "phdi_data_factory_name" { 2 | value = azurerm_data_factory.phdi_data_factory.name 3 | } 4 | 5 | output "ingestion_pipeline_name" { 6 | value = azurerm_data_factory_pipeline.phdi_ingestion.name 7 | } 8 | -------------------------------------------------------------------------------- /terraform/modules/data_factory/variables.tf: -------------------------------------------------------------------------------- 1 | variable "location" { 2 | type = string 3 | description = "Function App Location" 4 | } 5 | 6 | variable "resource_group_name" { 7 | type = string 8 | description = "Resource Group Name" 9 | } 10 | 11 | variable "fhir_converter_url" { 12 | type = string 13 | description = "URL of the FHIR conversion service" 14 | } 15 | 16 | variable "ingestion_container_url" { 17 | type = string 18 | description = "URL of the ingestion container" 19 | } 20 | 21 | variable "message_parser_url" { 22 | type = string 23 | description = "URL of the message parser container" 24 | } 25 | 26 | variable "validation_container_url" { 27 | type = string 28 | description = "URL of the validation container" 29 | } 30 | 31 | variable "record_linkage_container_url" { 32 | type = string 33 | description = "URL of the record linkage container" 34 | } 35 | 36 | variable "fhir_server_url" { 37 | type = string 38 | description = "URL of the FHIR server" 39 | } 40 | 41 | variable "phi_storage_account_endpoint_url" { 42 | type = string 43 | description = "URL of the PHI storage account" 44 | } 45 | 46 | variable "pipeline_runner_id" { 47 | type = string 48 | description = "ID of the pipeline runner identity" 49 | } 50 | 51 | variable "pipeline_runner_principal_id" { 52 | type = string 53 | description = "Principal ID of the pipeline runner identity" 54 | } 55 | 56 | variable "pipeline_runner_resource_id" { 57 | type = string 58 | description = "Resource ID of the pipeline runner identity" 59 | } 60 | 61 | variable "fhir_upload_failures_container_name" { 62 | type = string 63 | description = "Container name for failed FHIR uploads" 64 | } 65 | 66 | variable "delta_tables_container_name" { 67 | type = string 68 | description = "Container name for delta table storage" 69 | } 70 | 71 | variable "phi_storage_account_name" { 72 | type = string 73 | description = "PHI storage account name" 74 | } 75 | 76 | variable "validation_failures_container_name" { 77 | type = string 78 | description = "Container name for failed validations" 79 | } 80 | 81 | variable "fhir_conversion_failures_container_name" { 82 | type = string 83 | description = "Container name for failed FHIR conversions" 84 | } 85 | 86 | variable "client_id" { 87 | type = string 88 | description = "Client ID" 89 | } 90 | 91 | variable "key_vault_name" { 92 | type = string 93 | description = "Key vault name" 94 | } 95 | -------------------------------------------------------------------------------- /terraform/modules/read_source_data/data.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_client_config" "current" {} -------------------------------------------------------------------------------- /terraform/modules/read_source_data/main.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_storage_account" "function_app_sa" { 2 | name = "phdi${terraform.workspace}funcs${substr(var.client_id, 0, 8)}" 3 | resource_group_name = var.resource_group_name 4 | location = var.location 5 | account_tier = "Standard" 6 | account_replication_type = "LRS" 7 | } 8 | 9 | resource "azurerm_storage_container" "read_source_data" { 10 | name = "read-source-data" 11 | storage_account_name = azurerm_storage_account.function_app_sa.name 12 | } 13 | 14 | resource "azurerm_service_plan" "function_app_sp" { 15 | name = "phdi-${terraform.workspace}-azure-functions-sp" 16 | location = var.location 17 | resource_group_name = var.resource_group_name 18 | os_type = "Linux" 19 | sku_name = "Y1" 20 | } 21 | 22 | resource "azurerm_log_analytics_workspace" "log_analytics_workspace" { 23 | name = "workspace-${terraform.workspace}" 24 | location = var.location 25 | resource_group_name = var.resource_group_name 26 | retention_in_days = 30 27 | } 28 | 29 | resource "azurerm_application_insights" "insights" { 30 | name = "phdi-${terraform.workspace}-insights" 31 | location = var.location 32 | resource_group_name = var.resource_group_name 33 | application_type = "web" 34 | workspace_id = azurerm_log_analytics_workspace.log_analytics_workspace.id 35 | } 36 | 37 | resource "azurerm_linux_function_app" "read_source_data" { 38 | name = "${terraform.workspace}-read-source-data-${substr(var.client_id, 0, 8)}" 39 | location = var.location 40 | resource_group_name = var.resource_group_name 41 | service_plan_id = azurerm_service_plan.function_app_sp.id 42 | storage_account_name = azurerm_storage_account.function_app_sa.name 43 | storage_account_access_key = azurerm_storage_account.function_app_sa.primary_access_key 44 | identity { 45 | type = "UserAssigned" 46 | identity_ids = [var.pipeline_runner_id] 47 | } 48 | 49 | app_settings = { 50 | WEBSITE_ENABLE_SYNC_UPDATE_SITE = true 51 | FUNCTIONS_WORKER_RUNTIME = "python" 52 | SCM_DO_BUILD_DURING_DEPLOYMENT = 1 53 | RESOURCE_GROUP_NAME = var.resource_group_name 54 | FACTORY_NAME = var.phdi_data_factory_name 55 | PIPELINE_NAME = var.ingestion_pipeline_name 56 | AZURE_CLIENT_ID = var.pipeline_runner_client_id 57 | AZURE_TENANT_ID = data.azurerm_client_config.current.tenant_id 58 | AZURE_SUBSCRIPTION_ID = var.subscription_id 59 | WAIT_TIME = var.wait_time 60 | SLEEP_TIME = var.sleep_time 61 | INGESTION_URL = var.ingestion_container_url 62 | RECORD_LINKAGE_URL = var.record_linkage_container_url 63 | MESSAGE_PARSER_URL = var.message_parser_url 64 | AzureStorageQueuesConnectionString = var.phi_storage_account_connection_string 65 | STAGING_QUEUE_URL = var.staging_queue_url 66 | } 67 | 68 | lifecycle { 69 | ignore_changes = [ 70 | app_settings["WEBSITE_RUN_FROM_PACKAGE"], 71 | tags["hidden-link: /app-insights-conn-string"], 72 | tags["hidden-link: /app-insights-instrumentation-key"], 73 | tags["hidden-link: /app-insights-resource-id"], 74 | ] 75 | } 76 | 77 | site_config { 78 | application_stack { 79 | python_version = "3.9" 80 | } 81 | application_insights_key = azurerm_application_insights.insights.instrumentation_key 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /terraform/modules/read_source_data/outputs.tf: -------------------------------------------------------------------------------- 1 | output "log_analytics_workspace_id" { 2 | value = azurerm_log_analytics_workspace.log_analytics_workspace.id 3 | } 4 | -------------------------------------------------------------------------------- /terraform/modules/read_source_data/variables.tf: -------------------------------------------------------------------------------- 1 | variable "resource_group_name" { 2 | type = string 3 | description = "The name of the resource group in which to create the resources." 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "The Azure location where the resources should be created." 9 | } 10 | 11 | variable "phdi_data_factory_name" { 12 | type = string 13 | description = "The name of the PHDI ADF resource." 14 | } 15 | 16 | variable "ingestion_pipeline_name" { 17 | type = string 18 | description = "The name of the ingestion pipeline in ADF." 19 | } 20 | 21 | variable "subscription_id" { 22 | type = string 23 | description = "The Azure subscription ID." 24 | } 25 | 26 | variable "pipeline_runner_id" { 27 | type = string 28 | description = "ID of the pipeline runner identity" 29 | } 30 | 31 | variable "pipeline_runner_client_id" { 32 | type = string 33 | description = "Client ID of the pipeline runner identity" 34 | } 35 | 36 | variable "client_id" { 37 | type = string 38 | description = "Client ID of the app registration used to authenticate to Azure" 39 | } 40 | 41 | variable "wait_time" { 42 | type = number 43 | description = "The number of seconds to wait when polling for a resource." 44 | } 45 | 46 | variable "sleep_time" { 47 | type = number 48 | description = "The number of seconds to sleep in lookup tries for a resource." 49 | } 50 | 51 | variable "ingestion_container_url" { 52 | type = string 53 | description = "The URL of the ingestion service." 54 | } 55 | 56 | variable "record_linkage_container_url" { 57 | type = string 58 | description = "The URL of the record linkage service." 59 | } 60 | 61 | variable "phi_storage_account_connection_string" { 62 | type = string 63 | description = "The connection string for the storage account." 64 | } 65 | 66 | variable "staging_queue_url" { 67 | type = string 68 | description = "The URL of the staging queue." 69 | } 70 | 71 | variable "message_parser_url" { 72 | type = string 73 | description = "URL of the message parser container" 74 | } -------------------------------------------------------------------------------- /terraform/modules/shared/data.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_client_config" "current" {} 2 | -------------------------------------------------------------------------------- /terraform/modules/shared/outputs.tf: -------------------------------------------------------------------------------- 1 | output "phi_storage_account_endpoint_url" { 2 | value = azurerm_storage_account.phi.primary_blob_endpoint 3 | } 4 | 5 | output "phi_storage_account_key" { 6 | value = azurerm_storage_account.phi.primary_access_key 7 | } 8 | 9 | output "fhir_server_url" { 10 | value = azurerm_healthcare_fhir_service.fhir_server.authentication[0].audience 11 | } 12 | 13 | output "pipeline_runner_id" { 14 | value = azurerm_user_assigned_identity.pipeline_runner.id 15 | } 16 | 17 | output "pipeline_runner_client_id" { 18 | value = azurerm_user_assigned_identity.pipeline_runner.client_id 19 | } 20 | 21 | output "pipeline_runner_principal_id" { 22 | value = azurerm_user_assigned_identity.pipeline_runner.principal_id 23 | } 24 | 25 | output "pipeline_runner_resource_id" { 26 | value = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourcegroups/${var.resource_group_name}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${azurerm_user_assigned_identity.pipeline_runner.name}" 27 | } 28 | 29 | output "fhir_upload_failures_container_name" { 30 | value = azurerm_storage_container.fhir_upload_failures_container_name.name 31 | } 32 | 33 | output "validation_failures_container_name" { 34 | value = azurerm_storage_container.validation_failures_container_name.name 35 | } 36 | 37 | output "fhir_conversion_failures_container_name" { 38 | value = azurerm_storage_container.fhir_conversion_failures_container_name.name 39 | } 40 | 41 | output "delta_tables_container_name" { 42 | value = azurerm_storage_data_lake_gen2_filesystem.delta-tables.name 43 | } 44 | 45 | output "phi_storage_account_name" { 46 | value = azurerm_storage_account.phi.name 47 | } 48 | 49 | output "fhir_converter_url" { 50 | value = "https://phdi-${terraform.workspace}-fhir-converter.${azurerm_container_app_environment.phdi.default_domain}" 51 | } 52 | 53 | output "ingestion_container_url" { 54 | value = "https://phdi-${terraform.workspace}-ingestion.${azurerm_container_app_environment.phdi.default_domain}" 55 | } 56 | 57 | output "message_parser_url" { 58 | value = "https://phdi-${terraform.workspace}-message-parser.${azurerm_container_app_environment.phdi.default_domain}" 59 | } 60 | 61 | output "validation_container_url" { 62 | value = "https://phdi-${terraform.workspace}-validation.${azurerm_container_app_environment.phdi.default_domain}" 63 | } 64 | 65 | output "record_linkage_container_url" { 66 | value = "https://phdi-${terraform.workspace}-record-linkage.${azurerm_container_app_environment.phdi.default_domain}" 67 | } 68 | 69 | output "key_vault_name" { 70 | value = azurerm_key_vault.phdi_key_vault.name 71 | } 72 | 73 | output "phi_storage_account_connection_string" { 74 | value = azurerm_storage_account.phi.primary_connection_string 75 | } 76 | 77 | output "staging_queue_url" { 78 | value = "https://${azurerm_storage_account.phi.name}.queue.core.windows.net/${azurerm_storage_queue.staging_queue.name}" 79 | } 80 | -------------------------------------------------------------------------------- /terraform/modules/shared/variables.tf: -------------------------------------------------------------------------------- 1 | variable "resource_group_name" { 2 | type = string 3 | description = "The name of the resource group in which to create the resources." 4 | } 5 | 6 | variable "location" { 7 | type = string 8 | description = "The Azure location where the resources should be created." 9 | } 10 | 11 | variable "smarty_auth_id" { 12 | type = string 13 | description = "The SmartyStreets Auth ID." 14 | } 15 | 16 | variable "smarty_auth_token" { 17 | type = string 18 | description = "The SmartyStreets Auth Token." 19 | } 20 | 21 | variable "smarty_license_type" { 22 | type = string 23 | description = "The SmartyStreets license type to use." 24 | } 25 | 26 | variable "client_id" { 27 | type = string 28 | description = "Client ID" 29 | } 30 | 31 | variable "object_id" { 32 | type = string 33 | description = "Object ID" 34 | } 35 | 36 | variable "ghcr_username" { 37 | type = string 38 | description = "GitHub Container Registry username." 39 | } 40 | 41 | variable "ghcr_token" { 42 | type = string 43 | description = "GitHub Container Registry token." 44 | } 45 | 46 | variable "log_analytics_workspace_id" { 47 | type = string 48 | description = "Log Analytics Workspace ID." 49 | } 50 | -------------------------------------------------------------------------------- /terraform/setup/main.tf: -------------------------------------------------------------------------------- 1 | ########################################################################################### 2 | # 3 | # This file creates the bare minimum infrastructure to start storing remote state. 4 | # It can't store its own remote state, so this file contains only one resource. 5 | # 6 | # In other words, do not apply this file multiple times, as it will fail due to lack of 7 | # state - it won't know it already created the resources. 8 | # 9 | ########################################################################################### 10 | 11 | terraform { 12 | required_providers { 13 | azurerm = { 14 | source = "hashicorp/azurerm" 15 | version = "=3.23.0" 16 | } 17 | } 18 | } 19 | 20 | provider "azurerm" { 21 | use_oidc = true 22 | features {} 23 | } 24 | 25 | resource "azurerm_storage_account" "tfstate" { 26 | name = "phditfstate${substr(var.client_id, 0, 8)}" 27 | resource_group_name = var.resource_group_name 28 | location = var.location 29 | account_tier = "Standard" 30 | account_kind = "StorageV2" 31 | account_replication_type = "GRS" 32 | 33 | lifecycle { 34 | prevent_destroy = true 35 | } 36 | } 37 | 38 | resource "azurerm_storage_container" "tfstate" { 39 | name = "tfstate" 40 | storage_account_name = azurerm_storage_account.tfstate.name 41 | } 42 | -------------------------------------------------------------------------------- /terraform/setup/variables.tf: -------------------------------------------------------------------------------- 1 | variable "subscription_id" { 2 | description = "value of the Azure Subscription ID to use" 3 | } 4 | 5 | variable "location" { 6 | description = "value of the Azure location to deploy to" 7 | default = "Central US" 8 | } 9 | 10 | variable "resource_group_name" { 11 | description = "value of the Azure resource group to deploy to" 12 | } 13 | 14 | variable "client_id" { 15 | description = "value of the Azure App registration ID to use in the tfstate storage account name" 16 | } --------------------------------------------------------------------------------