├── 20-Industry-Use-Cases
    ├── 21-Mortgage-and-Lending
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── helpers.py
    │   ├── images
    │   │   ├── README
    │   │   └── a_lending_flow_architecture.png
    │   └── documents
    │   │   ├── README
    │   │   ├── lending_package.pdf
    │   │   ├── lending_package_w2.pdf
    │   │   ├── lending_package_w2.png
    │   │   ├── lending_package_check.pdf
    │   │   ├── lending_package_ID_Card.pdf
    │   │   ├── lending_package_pay_stub.pdf
    │   │   ├── lending_package_account_statement.pdf
    │   │   └── homeowner_insurance_application_sample.pdf
    └── 22-Medical-Claims-Processing
    │   ├── data
    │       ├── images
    │       │   └── Medical_Claims_Processing_Architecture.png
    │       ├── agent_resources
    │       │   └── agent_prompt.txt
    │       └── blueprint
    │       │   └── claims_form.json
    │   ├── utils
    │       ├── display_functions.py
    │       ├── bedrock_utils.py
    │       └── helper_functions.py
    │   └── assets
    │       └── lambdas
    │           ├── delete-efs-volume
    │               └── index.py
    │           ├── lifecycle-configuration
    │               └── index.py
    │           ├── schema-loader
    │               └── index.py
    │           ├── create-vector-index
    │               └── index.py
    │           └── claims-review-agent-action
    │               └── index.py
├── .gitignore
├── 10-Understanding-BDA
    ├── data
    │   ├── documents
    │   │   ├── claim-form.png
    │   │   ├── claims-pack.pdf
    │   │   ├── BankStatement.jpg
    │   │   ├── BankStatement.pdf
    │   │   └── sample1_cms-1500-P.pdf
    │   └── blueprints
    │   │   ├── medical_transcription.json
    │   │   ├── discharge_summary.json
    │   │   ├── lab_reports.json
    │   │   ├── explanation_of_benefits.json
    │   │   ├── claims_form.json
    │   │   └── blueprint_schema.json
    ├── utils
    │   ├── display_functions.py
    │   └── helper_functions.py
    └── 11_getting_started_with_bda.ipynb
├── images
    └── amazon-bedrock-data-automation-overview.png
├── CODE_OF_CONDUCT.md
├── LICENSE
├── CONTRIBUTING.md
└── README.md


/20-Industry-Use-Cases/21-Mortgage-and-Lending/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/images/README:
--------------------------------------------------------------------------------
1 | This folder holds images used in the Workbook description. 
2 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/README:
--------------------------------------------------------------------------------
1 | This folder holds document images used in the Lending Workbook. 
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/*checkpoint.ipynb
2 | **/*checkpoint*
3 | **/*.pyc
4 | **/**/.ipynb_checkpoints/*
5 | **checkpoint**
6 | */.ipynb_checkpoints/*
7 | .DS_Store
8 | .log


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/documents/claim-form.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/claim-form.png


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/documents/claims-pack.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/claims-pack.pdf


--------------------------------------------------------------------------------
/images/amazon-bedrock-data-automation-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/images/amazon-bedrock-data-automation-overview.png


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/documents/BankStatement.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/BankStatement.jpg


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/documents/BankStatement.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/BankStatement.pdf


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/documents/sample1_cms-1500-P.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/sample1_cms-1500-P.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.png


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_check.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_check.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_ID_Card.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_ID_Card.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_pay_stub.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_pay_stub.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/images/a_lending_flow_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/images/a_lending_flow_architecture.png


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_account_statement.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_account_statement.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/homeowner_insurance_application_sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/homeowner_insurance_application_sample.pdf


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/images/Medical_Claims_Processing_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/images/Medical_Claims_Processing_Architecture.png


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | 
3 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
4 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
5 | opensource-codeofconduct@amazon.com with any additional questions or comments.
6 | 


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/medical_transcription.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"$schema": "http://json-schema.org/draft-07/schema#",
 3 | 	"description": "A standardized medical document containing patient data, clinical findings, and diagnostic information transcribed from healthcare provider notes or dictation. Includes medical terminology and follows structured formats for use in patient care and record-keeping.",
 4 | 	"class": "Medical Transcription",
 5 | 	"type": "object",
 6 | 	"definitions": {},
 7 | 	"properties": {
 8 | 		"summary": {
 9 | 			"type": "string",
10 | 			"inferenceType": "explicit",
11 | 			"instruction": "Summary of the report"
12 | 		}
13 | 	}
14 | }


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/utils/display_functions.py:
--------------------------------------------------------------------------------
 1 | import ipywidgets as widgets
 2 | from IPython.display import display
 3 | import boto3
 4 | 
 5 | 
 6 | s3 = boto3.client('s3')
 7 | 
 8 | 
 9 | def get_view(data, display_function=None):
10 |     out = widgets.Output()
11 |     with out:
12 |         if callable(display_function):
13 |             display_function(data)
14 |         else:
15 |             display(data)
16 |     return out
17 | 
18 | def display_multiple(views, view_titles = None):
19 |     main_tab = widgets.Tab()
20 |     for i, view in enumerate(views):
21 |         main_tab.children = (*main_tab.children, view)
22 |         tab_title = view_titles[i] if view_titles and view_titles[i] else f'Document {i}'
23 |         main_tab.set_title(i, title=tab_title)
24 |     display(main_tab)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | # MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/delete-efs-volume/index.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import time
 3 | from crhelper import CfnResource
 4 | 
 5 | helper = CfnResource()
 6 | 
 7 | @helper.create
 8 | def create(event, context):
 9 |   print("No action needed on create")
10 |   pass
11 | 
12 | @helper.update
13 | def update(event, context):
14 |   print("No action needed on update")
15 |   pass
16 | 
17 | @helper.delete
18 | def delete(event, context):
19 |   try:
20 |       # Extract the domain ID from the event
21 |       domain_id = event['ResourceProperties']['DomainId']
22 |       
23 |       # Initialize AWS clients
24 |       sagemaker = boto3.client('sagemaker')
25 |       efs = boto3.client('efs')
26 |       
27 |       # Describe the domain to get EFS ID
28 |       domain = sagemaker.describe_domain(DomainId=domain_id)
29 |       efs_id = domain['HomeEfsFileSystemId']
30 |       
31 |       print(f'Deleting mount targets for EFS ID {efs_id}')
32 |       # Delete mount targets
33 | 
34 |       try:
35 |         mount_targets = efs.describe_mount_targets(FileSystemId=efs_id)['MountTargets']
36 |         for mt in mount_targets:
37 |           efs.delete_mount_target(MountTargetId=mt['MountTargetId'])
38 |       
39 |         # Wait for mount targets to be deleted with a check
40 |         while True:
41 |             print(f'Checking mount targets for EFS ID {efs_id}')
42 |             response = efs.describe_mount_targets(FileSystemId=efs_id)
43 |             if not response['MountTargets']:  # If no mount targets exist
44 |                 print('All mount targets deleted')
45 |                 break
46 |             time.sleep(30)   # nosemgrep
47 |         print(f'Deleting file system with EFS ID {efs_id}')
48 |         # Delete the EFS file system
49 |         efs.delete_file_system(FileSystemId=efs_id)
50 |         print(f"Successfully deleted EFS {efs_id} for SageMaker Studio domain {domain_id}")
51 |       except efs.exceptions.FileSystemNotFound:
52 |         print(f"File system {efs_id} doesn't exist")
53 |       except Exception:
54 |         print(f"Error Deleting file System {efs_id}. Skipping")
55 | 
56 |       return efs_id
57 |       
58 |   except Exception as e:
59 |       error_message = f"Error deleting EFS for SageMaker Studio domain {domain_id}: {str(e)}"
60 |       print(error_message)
61 |       raise Exception(error_message)
62 | 
63 | def lambda_handler(event, context):
64 |   helper(event, context)


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/discharge_summary.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"$schema": "http://json-schema.org/draft-07/schema#",
  3 | 	"description": "A standard discharge summary report used by hospital containing details of the patient, medical provider and key facts on visit, medical assessment and a summary of discharge.",
  4 | 	"class": "Hospital Discharge Summary",
  5 | 	"type": "object",
  6 | 	"definitions": {
  7 | 		"VisitDetails": {
  8 | 			"type": "object",
  9 | 			"properties": {
 10 | 				"admitted_date": {
 11 | 					"type": "string",
 12 | 					"inferenceType": "explicit",
 13 | 					"instruction": "Date of admission in MM-DD-YYYY format"
 14 | 				},
 15 | 				"discharged_date": {
 16 | 					"type": "string",
 17 | 					"inferenceType": "explicit",
 18 | 					"instruction": "Date of discharge in MM-DD-YYYY format"
 19 | 				},
 20 | 				"discharged_to": {
 21 | 					"type": "string",
 22 | 					"inferenceType": "explicit",
 23 | 					"instruction": "Where the patient was discharged to"
 24 | 				}
 25 | 			}
 26 | 		},
 27 | 		"PatientDetails": {
 28 | 			"type": "object",
 29 | 			"properties": {
 30 | 				"name": {
 31 | 					"type": "string",
 32 | 					"inferenceType": "explicit",
 33 | 					"instruction": "Name of the patient"
 34 | 				},
 35 | 				"gender": {
 36 | 					"type": "string",
 37 | 					"inferenceType": "explicit",
 38 | 					"instruction": "Gender of the patient"
 39 | 				},
 40 | 				"patient_id": {
 41 | 					"type": "string",
 42 | 					"inferenceType": "explicit",
 43 | 					"instruction": "Unique id of the patient"
 44 | 				}
 45 | 			}
 46 | 		},
 47 | 		"ProviderDetails": {
 48 | 			"type": "object",
 49 | 			"properties": {
 50 | 				"name": {
 51 | 					"type": "string",
 52 | 					"inferenceType": "explicit",
 53 | 					"instruction": "Name of the provider"
 54 | 				},
 55 | 				"provider_id": {
 56 | 					"type": "string",
 57 | 					"inferenceType": "explicit",
 58 | 					"instruction": "Unique id of the provider"
 59 | 				}
 60 | 			}
 61 | 		},
 62 | 		"AssessmentDetails": {
 63 | 			"type": "object",
 64 | 			"properties": {
 65 | 				"reported_symptoms": {
 66 | 					"type": "string",
 67 | 					"inferenceType": "explicit",
 68 | 					"instruction": "Reported symptoms and history of present illness"
 69 | 				}
 70 | 			}
 71 | 		}
 72 | 	},
 73 | 	"properties": {
 74 | 		"hospital_name": {
 75 | 			"type": "string",
 76 | 			"inferenceType": "explicit",
 77 | 			"instruction": "Name of the hospital"
 78 | 		},
 79 | 		"hospital_contact": {
 80 | 			"type": "string",
 81 | 			"inferenceType": "explicit",
 82 | 			"instruction": "Contact details of the hospital"
 83 | 		},
 84 | 		"visit_details": {
 85 | 			"$ref": "#/definitions/VisitDetails"
 86 | 		},
 87 | 		"patient_details": {
 88 | 			"$ref": "#/definitions/PatientDetails"
 89 | 		},
 90 | 		"provider_details": {
 91 | 			"$ref": "#/definitions/ProviderDetails"
 92 | 		},
 93 | 		"assessment_details": {
 94 | 			"$ref": "#/definitions/AssessmentDetails"
 95 | 		},
 96 | 		"discharge_summary": {
 97 | 			"type": "string",
 98 | 			"inferenceType": "explicit",
 99 | 			"instruction": "Summary of discharge instructions"
100 | 		}
101 | 	}
102 | }


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | ## Reporting Bugs/Feature Requests
10 | 
11 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
12 | 
13 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
14 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
15 | 
16 | * A reproducible test case or series of steps
17 | * The version of our code being used
18 | * Any modifications you've made relevant to the bug
19 | * Anything unusual about your environment or deployment
20 | 
21 | ## Contributing via Pull Requests
22 | 
23 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
24 | 
25 | 1. You are working against the latest source on the *main* branch.
26 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
27 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
28 | 
29 | To send us a pull request, please:
30 | 
31 | 1. Fork the repository.
32 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
33 | 3. Ensure local tests pass.
34 | 4. Commit to your fork using clear commit messages.
35 | 5. Send us a pull request, answering any default questions in the pull request interface.
36 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
37 | 
38 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
39 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
40 | 
41 | ## Finding contributions to work on
42 | 
43 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
44 | 
45 | ## Code of Conduct
46 | 
47 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
48 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
49 | opensource-codeofconduct@amazon.com with any additional questions or comments.
50 | 
51 | ## Security issue notifications
52 | 
53 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
54 | 
55 | ## Licensing
56 | 
57 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
58 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/lifecycle-configuration/index.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import base64
 3 | from crhelper import CfnResource
 4 | 
 5 | helper = CfnResource()
 6 | client = boto3.client('sagemaker')
 7 | 
 8 | lcc_up1 = '\n'.join((
 9 |     '#!/bin/bash',
10 |     '',
11 |     'set -ex',
12 |     '',
13 |     'if [ ! -z "${SM_JOB_DEF_VERSION}" ]',
14 |     'then',
15 |     '   echo "Running in job mode, skip lcc"',
16 |     'else',
17 |     '   rm -rf amazon-bedrock-samples',
18 |     '   git clone https://github.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation.git',
19 |     '   mv sample-document-processing-with-amazon-bedrock-data-automation  bda-workshop',
20 |     '   rm -rf sample-document-processing-with-amazon-bedrock-data-automation',
21 |     '   echo "Repo cloned from git"',
22 |     'fi',
23 |     '',
24 | ))
25 | 
26 | def get_lcc_base64_string(lcc_string):
27 |     lcc_bytes = lcc_string.encode("ascii")
28 |     base64_lcc_bytes = base64.b64encode(lcc_bytes)
29 |     base64_lcc_string = base64_lcc_bytes.decode("ascii")
30 |     return base64_lcc_string
31 | 
32 | def apply_lcc_to_user_profile(base64_lcc_string, lcc_config_name, profile):
33 |     response = client.create_studio_lifecycle_config(
34 |         StudioLifecycleConfigName=lcc_config_name,
35 |         StudioLifecycleConfigContent=base64_lcc_string,
36 |         StudioLifecycleConfigAppType="JupyterLab",
37 |     )
38 | 
39 |     lcc_arn = response["StudioLifecycleConfigArn"]
40 |     update_up = client.update_user_profile(
41 |         DomainId=profile.split("|")[1],
42 |         UserProfileName=profile.split("|")[0],
43 |         UserSettings={
44 |             "JupyterLabAppSettings": {
45 |                 "DefaultResourceSpec": {"LifecycleConfigArn": lcc_arn},
46 |                 "LifecycleConfigArns": [lcc_arn]
47 |             }
48 |         }
49 |     )
50 |     return update_up
51 | 
52 | @helper.create
53 | @helper.update
54 | def create_or_update(event, context):
55 |     up1 = event["ResourceProperties"]["UserProfile"]
56 |     lcc_name_up1 = event["ResourceProperties"]["LCCName"]
57 |     try:
58 |         if event["RequestType"] == "Update":
59 |             try:
60 |                 response1 = client.delete_studio_lifecycle_config(
61 |                     StudioLifecycleConfigName=lcc_name_up1
62 |                 )
63 |                 print(response1)
64 |             except Exception as e2:
65 |                 print(e2)
66 |         
67 |         base64_lcc_up1_string = get_lcc_base64_string(lcc_up1)
68 |         updated_up1 = apply_lcc_to_user_profile(
69 |             base64_lcc_up1_string,
70 |             lcc_name_up1,
71 |             up1
72 |         )
73 |         print("Response User Profile LCC update for UP1")
74 |         print(updated_up1)
75 |         
76 |         return {"Data": 120}
77 |     except Exception as e:
78 |         raise e
79 | 
80 | @helper.delete
81 | def delete(event, context):
82 |     lcc_name_up1 = event["ResourceProperties"]["LCCName"]
83 | 
84 |     try:
85 |         response1 = client.delete_studio_lifecycle_config(
86 |             StudioLifecycleConfigName=lcc_name_up1
87 |         )
88 |         print(response1)
89 |         return {}
90 |     except Exception as e:
91 |         print(e)
92 |         return {"Error": str(e)}
93 | 
94 | def lambda_handler(event, context):
95 |     print(event)
96 |     helper(event, context)
97 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/schema-loader/index.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import os
 3 | from crhelper import CfnResource
 4 | 
 5 | # Initialize the helper
 6 | helper = CfnResource()
 7 | rds_data_client = boto3.client('rds-data')
 8 | 
 9 | s3_client = boto3.client('s3')
10 | cluster_arn = os.environ['CLUSTER_ARN']
11 | secret_arn = os.environ['SECRET_ARN']
12 | database_name = os.environ['DATABASE_NAME']
13 | create_schema_sql_file = os.environ['CREATE_SCHEMA_FILE']
14 | delete_schema_sql_file = os.environ['DELETE_SCHEMA_FILE']
15 | update_schema_sql_file = os.environ.get('UPDATE_SCHEMA_FILE',None)
16 | initial_data_sql_file = os.environ.get('INITIAL_DATA_FILE', None)
17 | 
18 | # Initialize the helper
19 | helper = CfnResource()
20 | 
21 | rds_data_client = boto3.client('rds-data')
22 | s3_client = boto3.client('s3')
23 | cluster_arn = os.environ['CLUSTER_ARN']
24 | secret_arn = os.environ['SECRET_ARN']
25 | database_name = os.environ['DATABASE_NAME']
26 | create_schema_sql_file = os.environ['CREATE_SCHEMA_FILE']
27 | delete_schema_sql_file = os.environ['DELETE_SCHEMA_FILE']
28 | update_schema_sql_file = os.environ.get('UPDATE_SCHEMA_FILE', None)
29 | initial_data_sql_file = os.environ.get('INITIAL_DATA_FILE', None)
30 | 
31 | 
32 | @helper.create
33 | def create(event, context):
34 |     """Handle Create event"""
35 |     execute(create_schema_sql_file)
36 |     if initial_data_sql_file:
37 |         execute(initial_data_sql_file)
38 |     return "CustomResourcePhysicalID"
39 | 
40 | 
41 | @helper.update
42 | def update(event, context):
43 |     """Handle Update event"""
44 |     if update_schema_sql_file:
45 |         execute(update_schema_sql_file)
46 |     return "CustomResourcePhysicalID"
47 | 
48 | 
49 | @helper.delete
50 | def delete(event, context):
51 |     """Handle Delete event"""
52 |     execute(delete_schema_sql_file)
53 |     return "CustomResourcePhysicalID"
54 | 
55 | 
56 | def handler(event, context):
57 |     """Main handler function"""
58 |     print(event)
59 |     helper(event, context)
60 | 
61 | 
62 | def execute(sql_file_path:str):
63 |     """Create the schema in the database."""
64 |     # Download SQL script from S3
65 |     bucket_name, key_name = parse_s3_url(sql_file_path)
66 |     sql_script = download_sql_script(bucket_name, key_name)
67 |     # Split script into individual statements and execute each one
68 |     statements = sql_script.split(';')
69 |     for statement in statements:
70 |         if statement.strip():
71 |             # Execute each statement
72 |             print(f"Executing statement: {statement}")
73 |             execute_statement(cluster_arn, secret_arn, database_name, statement)
74 | 
75 | 
76 | def parse_s3_url(s3_url):
77 |     """Parse S3 URL into bucket name and key."""
78 |     s3_url_parts = s3_url.replace("s3://", "").split("/", 1)
79 |     return s3_url_parts[0], s3_url_parts[1]
80 | 
81 | 
82 | def download_sql_script(bucket_name, key_name):
83 |     """Download SQL script from S3."""
84 |     response = s3_client.get_object(Bucket=bucket_name, Key=key_name)
85 |     return response['Body'].read().decode('utf-8')
86 | 
87 | 
88 | def execute_statement(cluster_arn, secret_arn, database_name, sql_statement):
89 |     """Execute a single SQL statement using RDS Data API."""
90 |     response = rds_data_client.execute_statement(
91 |         resourceArn=cluster_arn,
92 |         secretArn=secret_arn,
93 |         database=database_name,
94 |         sql=sql_statement
95 |     )
96 |     print(response)
97 | 
98 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/create-vector-index/index.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | from crhelper import CfnResource
  3 | from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
  4 | from time import sleep
  5 | 
  6 | 
  7 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL', sleep_on_delete=120, ssl_verify=None)
  8 | 
  9 | #No-op for update and delete
 10 | @helper.delete
 11 | def no_op(event, context):
 12 |     print("No op for delete")
 13 | 
 14 | #get aoss_host from os environ
 15 | def removeHttpsPrefix(endpoint):
 16 |     """
 17 |     This function removes the "https://" prefix from a given endpoint string,
 18 |     if present, and returns the modified string.
 19 |     """
 20 |     if endpoint.startswith("https://"):
 21 |         return endpoint[8:]
 22 |     return endpoint
 23 | 
 24 | def get_aoss_host(resource_properties):
 25 |     if "AOSSHost" not in resource_properties:
 26 |         raise Exception("AOSSHost not provided from resource properties")
 27 | 
 28 |     return removeHttpsPrefix(resource_properties["AOSSHost"])
 29 | 
 30 | def get_aoss_client(host):
 31 |     auth = AWSV4SignerAuth(
 32 |         boto3.Session().get_credentials(), 
 33 |         boto3.session.Session().region_name,
 34 |         "aoss"
 35 |     )
 36 |     # create an opensearch client and use the request-signer
 37 |     return OpenSearch(
 38 |         hosts=[{'host': host, 'port': 443}],
 39 |         http_auth=auth,
 40 |         use_ssl=True,
 41 |         verify_certs=True,
 42 |         connection_class=RequestsHttpConnection
 43 |     )
 44 | def get_aoss_index_name(resource_properties):
 45 |     if "AOSSIndexName" not in resource_properties:
 46 |         raise Exception("AOSSIndexName not provided from resource properties")
 47 |     return resource_properties["AOSSIndexName"]
 48 |         
 49 | #Function to use the opensearch-py library to create an index within an opensearch collection
 50 | def create_aoss_index(index_name, aos_client):
 51 |     index_body = {
 52 |         "settings": {
 53 |             "index.knn": True
 54 |         },
 55 |         "mappings": {
 56 |             "properties": {
 57 |                 "vector": {
 58 |                     "type": "knn_vector",
 59 |                     "dimension": 1024,
 60 |                     "method": {
 61 |                         "name": "hnsw",
 62 |                         "space_type": "l2",
 63 |                         "engine": "faiss",
 64 |                         "parameters": {
 65 |                             "ef_construction": 512,
 66 |                             "m": 16
 67 |                         }
 68 |                     }
 69 |                 },
 70 |                 "text": {
 71 |                     "type": "text"
 72 |                 },
 73 |                 "id": {
 74 |                     "type": "text"
 75 |                 },
 76 |                 "text-metadata": {
 77 |                     "type": "text"
 78 |                 },
 79 |                 "x-amz-bedrock-kb-source-uri": {
 80 |                     "type": "text"
 81 |                 }
 82 |             }
 83 |         }
 84 |     }              
 85 | 
 86 |     aos_client.indices.create(index=index_name, body=index_body)
 87 |     print(f"Created index {index_name}")
 88 |     
 89 | #Handles create event of the CloudFormation resource
 90 | @helper.create
 91 | @helper.update
 92 | def create_or_update_index(event, context):
 93 |     resource_properties = event['ResourceProperties']
 94 |     aoss_host = get_aoss_host(resource_properties)
 95 |     aos_client = get_aoss_client(aoss_host)
 96 |     index_name = get_aoss_index_name(resource_properties)
 97 |     response = None
 98 |     sleep(60)   # nosemgrep
 99 |     if not aos_client.indices.exists(index=index_name):
100 |         response = create_aoss_index(index_name=index_name, aos_client=aos_client)
101 |     return response
102 | 
103 | def lambda_handler(event, context):
104 |     print(event)
105 |     helper(event, context)


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/lab_reports.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"$schema": "http://json-schema.org/draft-07/schema#",
  3 | 	"description": "The lab reports document specimen analysis from a medical procedure, including clinical history, gross examination, microscopic findings, and immunostaining results. The report provides detailed measurements and diagnostic markers to support the final medical diagnosis.",
  4 | 	"class": "Lab Reports",
  5 | 	"type": "object",
  6 | 	"definitions": {
  7 | 		"PatientInfo": {
  8 | 			"type": "object",
  9 | 			"properties": {
 10 | 				"name": {
 11 | 					"type": "string",
 12 | 					"inferenceType": "explicit",
 13 | 					"instruction": "The name of the patient"
 14 | 				},
 15 | 				"medical_record_number": {
 16 | 					"type": "string",
 17 | 					"inferenceType": "explicit",
 18 | 					"instruction": "The patient's medical record number"
 19 | 				},
 20 | 				"date_of_birth": {
 21 | 					"type": "string",
 22 | 					"inferenceType": "explicit",
 23 | 					"instruction": "The patient's date of birth in MM/DD/YYYY format"
 24 | 				},
 25 | 				"gender": {
 26 | 					"type": "string",
 27 | 					"inferenceType": "explicit",
 28 | 					"instruction": "The patient's gender"
 29 | 				}
 30 | 			}
 31 | 		},
 32 | 		"ProcedureInfo": {
 33 | 			"type": "object",
 34 | 			"properties": {
 35 | 				"accession_number": {
 36 | 					"type": "string",
 37 | 					"inferenceType": "explicit",
 38 | 					"instruction": "The unique identifier for the specimen/procedure"
 39 | 				},
 40 | 				"date": {
 41 | 					"type": "string",
 42 | 					"inferenceType": "explicit",
 43 | 					"instruction": "The date of the procedure in MM/DD/YYYY format"
 44 | 				},
 45 | 				"attending_physician": {
 46 | 					"type": "string",
 47 | 					"inferenceType": "explicit",
 48 | 					"instruction": "The name of the physician who performed or attended the procedure"
 49 | 				}
 50 | 			}
 51 | 		},
 52 | 		"ClinicalInfo": {
 53 | 			"type": "object",
 54 | 			"properties": {
 55 | 				"history": {
 56 | 					"type": "string",
 57 | 					"inferenceType": "explicit",
 58 | 					"instruction": "A brief instruction of the patient's symptoms or reason for procedure"
 59 | 				},
 60 | 				"specimen": {
 61 | 					"type": "string",
 62 | 					"inferenceType": "explicit",
 63 | 					"instruction": "A instruction of the tissue or organ examined"
 64 | 				}
 65 | 			}
 66 | 		},
 67 | 		"Diagnosis": {
 68 | 			"type": "object",
 69 | 			"properties": {
 70 | 				"diagnosis": {
 71 | 					"type": "string",
 72 | 					"inferenceType": "explicit",
 73 | 					"instruction": "The diagnosis or conclusion"
 74 | 				},
 75 | 				"tumor_size": {
 76 | 					"type": "string",
 77 | 					"inferenceType": "explicit",
 78 | 					"instruction": "The size of any tumor in cm"
 79 | 				},
 80 | 				"cell_type": {
 81 | 					"type": "string",
 82 | 					"inferenceType": "explicit",
 83 | 					"instruction": "A instruction of the type of cells observed"
 84 | 				},
 85 | 				"other_findings": {
 86 | 					"type": "string",
 87 | 					"inferenceType": "explicit",
 88 | 					"instruction": "Other relevant microscopic findings"
 89 | 				}
 90 | 			}
 91 | 		},
 92 | 		"Immunostains": {
 93 | 			"type": "object",
 94 | 			"properties": {
 95 | 				"positive_markers": {
 96 | 					"type": "string",
 97 | 					"inferenceType": "explicit",
 98 | 					"instruction": "Markers for which the tumor cells tested positive"
 99 | 				},
100 | 				"negative_markers": {
101 | 					"type": "string",
102 | 					"inferenceType": "explicit",
103 | 					"instruction": "Markers for which the tumor cells tested negative"
104 | 				}
105 | 			}
106 | 		}
107 | 	},
108 | 	"properties": {
109 | 		"patient_information": {
110 | 			"$ref": "#/definitions/PatientInfo"
111 | 		},
112 | 		"procedure_information": {
113 | 			"$ref": "#/definitions/ProcedureInfo"
114 | 		},
115 | 		"clinical_information": {
116 | 			"$ref": "#/definitions/ClinicalInfo"
117 | 		},
118 | 		"diagnosis": {
119 | 			"$ref": "#/definitions/Diagnosis"
120 | 		},
121 | 		"gross_instruction": {
122 | 			"type": "string",
123 | 			"inferenceType": "explicit",
124 | 			"instruction": "A instruction of the appearance of the specimen"
125 | 		},
126 | 		"microscopic_instruction": {
127 | 			"type": "string",
128 | 			"inferenceType": "explicit",
129 | 			"instruction": "A instruction of the tumor cells and tissue under the microscope"
130 | 		},
131 | 		"immunostains": {
132 | 			"$ref": "#/definitions/Immunostains"
133 | 		},
134 | 		"comment": {
135 | 			"type": "string",
136 | 			"inferenceType": "explicit",
137 | 			"instruction": "Any additional comments or notes"
138 | 		}
139 | 	}
140 | }


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/21-Mortgage-and-Lending/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import ipywidgets as widgets
  3 | import io
  4 | 
  5 | 
  6 | def pil_to_bytes(image):
  7 |     byte_arr = io.BytesIO()
  8 |     image.save(byte_arr, format='PNG')
  9 |     return byte_arr.getvalue()
 10 | 
 11 | 
 12 | def display_image(image):
 13 |     image_widget = widgets.Image(value=pil_to_bytes(image), format='png')
 14 |     image_widget.layout.width = '400px'
 15 |     image_widget.layout.height = 'auto'
 16 |     image_widget.layout.object_fit = 'contain'
 17 |     return image_widget
 18 | 
 19 | def json_to_html(json_obj, indent=0):
 20 |     result = []
 21 |     if isinstance(json_obj, dict):
 22 |         result.append('<table class="json-object">')
 23 |         for key, value in json_obj.items():
 24 |             result.append('<tr>')
 25 |             result.append(f'<td class="key">{key}</td>')
 26 |             result.append('<td class="value">')
 27 |             result.append(json_to_html(value, indent + 1))
 28 |             result.append('</td>')
 29 |             result.append('</tr>')
 30 |         result.append('</table>')
 31 |     elif isinstance(json_obj, list):
 32 |         result.append('<table class="json-array">')
 33 |         for i, item in enumerate(json_obj):
 34 |             result.append('<tr>')
 35 |             result.append(f'<td class="key">{i}</td>')
 36 |             result.append('<td class="value">')
 37 |             result.append(json_to_html(item, indent + 1))
 38 |             result.append('</td>')
 39 |             result.append('</tr>')
 40 |         result.append('</table>')
 41 |     elif isinstance(json_obj, (str, int, float, bool)) or json_obj is None:
 42 |         if isinstance(json_obj, str):
 43 |             result.append(f'<span class="string">"{json_obj}"</span>')
 44 |         elif isinstance(json_obj, bool):
 45 |             result.append(f'<span class="boolean">{str(json_obj).lower()}</span>')
 46 |         elif json_obj is None:
 47 |             result.append('<span class="null">null</span>')
 48 |         else:
 49 |             result.append(f'<span class="number">{json_obj}</span>')
 50 |     return ''.join(result)
 51 |     
 52 | def display_json(json_data, title):
 53 |     html_content = f"""
 54 |     <div class="json-container">
 55 |         <h3 class="json-title">{title}</h3>
 56 |         <div class="json-viewer">
 57 |             {json_to_html(json_data)}
 58 |         </div>
 59 |     </div>
 60 |     <style>
 61 |         .json-container {{
 62 |             margin-bottom: 20px;
 63 |         }}
 64 |         .json-title {{
 65 |             font-family: sans-serif;
 66 |             font-size: 18px;
 67 |             font-weight: bold;
 68 |             margin-bottom: 10px;
 69 |             color: #333;
 70 |         }}
 71 |         .json-viewer {{
 72 |             font-family: monospace;
 73 |             font-size: 14px;
 74 |             line-height: 1.5;
 75 |             background-color: #f8f8f8;
 76 |             border: 1px solid #ddd;
 77 |             border-radius: 4px;
 78 |             padding: 10px;
 79 |             max-height: 500px;
 80 |             overflow: auto;
 81 |         }}
 82 |         .json-object, .json-array {{
 83 |             border-collapse: collapse;
 84 |             margin-left: 20px;
 85 |         }}
 86 |         .key {{
 87 |             color: #881391;
 88 |             vertical-align: top;
 89 |             padding-right: 10px;
 90 |         }}
 91 |         .value {{
 92 |             padding-left: 10px;
 93 |         }}
 94 |         .string {{ color: #1a1aa6; }}
 95 |         .number {{ color: #116644; }}
 96 |         .boolean {{ color: #ff8c00; }}
 97 |         .null {{ color: #808080; }}
 98 |     </style>
 99 |     """
100 |     return widgets.HTML(html_content)
101 | 
102 | def display_image_jsons(image, json_arr, titles):
103 |     image_widget = display_image(image)
104 |     right_column =  widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)])
105 |     bordered_hbox = widgets.HBox([image_widget, right_column])
106 |     bordered_hbox.layout.border = '5px solid black'
107 |     bordered_hbox.layout.padding = '10px'
108 |     bordered_hbox.layout.margin = '10px'
109 |     return bordered_hbox
110 | 
111 | 
112 | def get_s3_to_dict(s3, s3_url):
113 |     bucket_name = s3_url.split('/')[2]
114 |     object_key = '/'.join(s3_url.split('/')[3:])
115 |     
116 |     # Download the JSON file from S3
117 |     response = s3.get_object(Bucket=bucket_name, Key=object_key)
118 |     json_content = response['Body'].read().decode('utf-8')
119 |     
120 |     # Parse the JSON content
121 |     json_obj = json.loads(json_content)
122 |     return json_obj


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/agent_resources/agent_prompt.txt:
--------------------------------------------------------------------------------
 1 | You are a Claims Reviewer AI assistant. Your task is to review insurance claims following a specific process using provided function calls and a knowledge base. At the end of the review you 
 2 | would provide a detailed report of the review findings and status.
 3 | To finish the review carry out all the steps detailed below carefully and thoroughly. DO NOT ASK THE USER FOR MORE INFORMATION. ALL information is available in the claim form data
 4 | 
 5 | STEP 1 - EXTRACT CLAIM FORM DATA
 6 |    - To begin with You will be provided with a claim form URI. You must first get the claim form data from S3 using the given URI as input.
 7 |    - Use the function call get_claim_form_data(claim_form_uri) to get the claim form data.
 8 |    - Once you have the claim form data, Keep a note of all the fields and their values, you would use all of the fields in the form data in later steps.
 9 | 
10 | STEP 2 - VERIFY INSURED MEMBER AND PATIENT DETAILS
11 |    - Use the insured id number, patient last name and patient date of birth from the claim form data to get the member and patient detail from the claims database
12 |    - Compare the insured member details with the details in the claim form data
13 |    - for each detail, add an entry to your final report. Use this table format
14 |       | Field Name | Claim Form Data | Database Data | Match or No Match |
15 |       |------------|-----------------|---------------|-------------------|
16 |    - If any discrepancies are found, add a note to your report and stop the process and respond with final report.
17 |    - If the insured member and patient details are verified, add a note to your report and continue the process
18 |    - Continue to Step 3
19 | 
20 | STEP 3 CREATE CLAIM RECORD
21 |    - Once and only if the insured member and patient details are matched Use the function call createClaim to create a claim record in the claims database.
22 |    - use the  data already gathered in the previous step to call the action to create a claim record
23 |       1. The patient details
24 |       2. The insured member details
25 |       3. Fields in the Claim form data
26 |    - Use "IN_PROGRESS" as the status of the claim record
27 |    - keep a note of the claim id returned after creating the claim data, you will need it later.
28 |    - If the claim record is created, add a note to your final report
29 |    - If the claim record is not created, add a note to your report and stop the process and respond with final report
30 |    - CONTINUE TO STEP 4
31 | 
32 | STEP 4. RETRIEVE EVIDENCE OF COVERAGE DETAILS FOR THE INSURANCE PLAN
33 |    - Using the insured_plan_name from the insured member detai find a matching document in the Claims Evidence of Coverage Knowledge Base
34 |    - STRICTLY USE only the document that matches the insured_plan_name. 
35 |    - If no document is found, add a note to your report and stop the process and respond with final report.
36 |    - If document is found, add a note to your report and continue the process
37 |    - CONTINUE TO STEP 5
38 | 
39 | STEP 5. EVALUATE COVERAGE
40 |    - Use the claim form data to identify the services, treatments, procedures, and charges.
41 |    - Add to your note the list of services, treatments, procedures, respective date, place and associated charges.
42 |    - Using the details of each of the service, procedure code and charges in the claim form data search the content from evidence of coverage document to determine if that particular service/procedure or treatement it's covered by the specific insurance plan
43 |    - Add the findings in your final report in this format along with a snippet of text from the evidence of coverage document that supports your findings
44 |       | Service/Procedure                   | Date      | Place      | Charges | Covered/Not Covered   | Relevant Justification 
45 |       |-------------------------------------|-----------|------------|---------|-----------------------|----------------------------------------------------------------------|
46 |    - For each service/procedure, add an entry to your report.
47 |    - CONTINUE TO STEP 6
48 |  
49 | STEP 6. UPDATE CLAIM STATUS 
50 |    - If all services are covered:
51 |      * Update the claim record using the claim id to set the status to "ELIGIBLE"
52 |    - If some or no services are covered:
53 |      * Update the claim record using the claim id to set the status to "ADJUDICATOR_REVIEW"
54 |    - CONTINUE TO STEP 7
55 | 
56 | STEP 7. Respond with the final report with the following contents
57 |    - Table containing the member and patient details and if they match with details in the database
58 |    - The table with services/procedures and their coverage status
59 |    - State the final claim status (ELIGIBLE or ADJUDICATOR_REVIEW).
60 | 
61 | When responding, please provide a thorough analysis following these steps. Be precise in your language, citing specific details from the claim form and EoC document. 
62 | If you need any clarification or additional information to complete the review, please ask. Your goal is to ensure accurate and fair claim processing 
63 | while adhering to the insurance plan's coverage guidelines.
64 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Document Processing with Amazon Bedrock Data Automation
  2 | 
  3 | ## How Bedrock Data Automation works
  4 | 
  5 | Bedrock Data Automation (BDA) lets you configure output based on your processing needs for a specific data type: documents, images, video or audio. BDA can generate standard output or custom output. Below are some key concepts for understanding how BDA works. If you're a new user, start with the information about standard output.
  6 | 
  7 | * **Standard output** – Sending a file to BDA with no other information returns the default standard output, which consists of commonly required information that's based on the data type. Examples include audio transcriptions, scene summaries for video, and document summaries. These outputs can be tuned to your use case using projects to modify them. For more information, see e.g. [Standard output for documents in Bedrock Data Automation](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-output-documents.html).
  8 | 
  9 | * **Custom output** – For documents and images, only. Choose custom output to define exactly what information you want to extract using a blueprint. A blueprint consists of a list of expected fields that you want retrieved from a document or image. Each field represents a piece of information that needs to be extracted to meet your specific use case. You can create your own blueprints, or select predefined blueprints from the BDA blueprint catalog. For more information, see [Custom output and blueprints](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-custom-output-idp.html).
 10 | 
 11 | * **Projects** – A project is a BDA resource that allows you to modify and organize output configurations. Each project can contain standard output configurations for documents, images, video, and audio, as well as custom output blueprints for documents and images. Projects are referenced in the `InvokeDataAutomationAsync` API call to instruct BDA on how to process the files. For more information about projects and their use cases, see [Bedrock Data Automation projects](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-projects.html).
 12 | 
 13 | <img src="images/amazon-bedrock-data-automation-overview.png" alt="Overview Bedrock Data Automation" title="Overview Bedrock Data Automation" width="600">
 14 | 
 15 | This workshop contains the following sections
 16 | 
 17 | * **1 - Understanding Bedrock Data Automation**
 18 |   * [Getting Started - How Bedrock Data Automation works](10-Understanding-BDA/11_getting_started_with_bda.ipynb)
 19 |   * [Document Insights with Standard Outputs](10-Understanding-BDA/12_standard_output_extended.ipynb)
 20 |   * [Custom Document Insights with Blueprints](10-Understanding-BDA/13_custom_outputs_and_blueprints.ipynb)
 21 | * **2 - Industry Use Cases - Document Processing**
 22 |   * [Mortgage and Lending Flow](20-Industry-Use-Cases/21-Mortgage-and-Lending/21_mortgage_and_lending.ipynb)
 23 |   * [Medical Claims Processing with Agents](20-Industry-Use-Cases/22-Medical-Claims-Processing/22_medical_claims_processing.ipynb)
 24 | 
 25 | * **3 - Bedrock Data Automation Patterns (Coming Soon)**
 26 | 
 27 | ### Use Cases
 28 | 
 29 | Here are some example use cases that BDA can help you with - 
 30 | 
 31 | **Document processing**: Automate Intelligent Document Processing workflows at scale, transforming unstructured documents into structured data outputs that can be customized to integrate with existing systems and workflows.
 32 | 
 33 | **Media analysis**: Extract meaningful insights from unstructured video by creating scene summaries, identifying unsafe/explicit content, extracting text, and classifying content, enabling intelligent video search, contextual advertising, and brand safety/compliance.
 34 | 
 35 | **Generative AI assistants**: Enhance the performance of your retrieval-augmented generation (RAG) powered question answering applications by providing them with rich, modality-specific data representations extracted from your documents, images, video, and audio.
 36 | 
 37 | ### Getting Started
 38 | 
 39 | * Create Jupyterlab space in Amazon Sagemaker Studio or any other environment
 40 | * Make sure you have the required IAM role permissions
 41 | * Checkout the repository
 42 | * Run through the notebooks
 43 | 
 44 | ### Required IAM Permissions
 45 | 
 46 | The features being explored in the notebook require the following IAM Policies for the execution role being used. If you're running this notebook within SageMaker Studio in your own Account, update the default execution role for the SageMaker user profile to include the following IAM policies. 
 47 | 
 48 | When using your own AWS Account to run this workshop, use AWS regions `us-east-1` or `us-west-2` where Bedrock Data Automation is available as of this writing.
 49 | 
 50 | ```json
 51 |   [
 52 |     {
 53 |         "Sid": "BDACreatePermissions",
 54 |         "Effect": "Allow",
 55 |         "Action": [
 56 |             "bedrock:CreateDataAutomationProject",
 57 |             "bedrock:CreateBlueprint"
 58 |         ],
 59 |         "Resource": "*"
 60 |     },
 61 |     {
 62 |         "Sid": "BDAOProjectsPermissions",
 63 |         "Effect": "Allow",
 64 |         "Action": [
 65 |             "bedrock:CreateDataAutomationProject",
 66 |             "bedrock:UpdateDataAutomationProject",
 67 |             "bedrock:GetDataAutomationProject",
 68 |             "bedrock:GetDataAutomationStatus",
 69 |             "bedrock:ListDataAutomationProjects",
 70 |             "bedrock:InvokeDataAutomationAsync"
 71 |         ],
 72 |         "Resource": "arn:aws:bedrock:::data-automation-project/*"
 73 |     },
 74 |     {
 75 |         "Sid": "BDABlueprintPermissions",
 76 |         "Effect": "Allow",
 77 |         "Action": [
 78 |             "bedrock:GetBlueprint",
 79 |             "bedrock:ListBlueprints",
 80 |             "bedrock:UpdateBlueprint",
 81 |             "bedrock:DeleteBlueprint"
 82 |         ],
 83 |         "Resource": "arn:aws:bedrock:::blueprint/*"
 84 |     },
 85 | 
 86 |       
 87 |     {
 88 |      "Sid": "BDACrossRegionInference",
 89 |      "Effect": "Allow",
 90 |      "Action": ["bedrock:InvokeDataAutomationAsync"],
 91 |      "Resource": [
 92 |       "arn:aws:bedrock:us-east-1:account_id:data-automation-profile/us.data-automation-v1",
 93 |       "arn:aws:bedrock:us-east-2:account_id:data-automation-profile/us.data-automation-v1",
 94 |       "arn:aws:bedrock:us-west-1:account_id:data-automation-profile/us.data-automation-v1",
 95 |       "arn:aws:bedrock:us-west-2:account_id:data-automation-profile/us.data-automation-v1"]
 96 |     }
 97 | ]
 98 | ```
 99 | 
100 | Note - The policy uses wildcard(s) for demo purposes. AWS recommends using least privileges when defining IAM Policies in your own AWS Accounts. See  [Security Best Practices in IAM](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html)
101 | 
102 | 
103 | ## Contributors
104 | 
105 | * Raja Vaidyanathan
106 | * Arlind Nocaj
107 | * Conor Manton
108 | * Luca Perrozzi


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/explanation_of_benefits.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"$schema": "http://json-schema.org/draft-07/schema#",
  3 | 	"description": "A blueprint for a Remittance Advice (RA) or Explanation of Benefits (EOB), which is a standard document sent by insurance companies to detail how a medical claim was processed showing breakdown of charges, what the insurance paid, any discounts and amount due",
  4 | 	"class": "Explanation of Benefits",
  5 | 	"type": "object",
  6 | 	"definitions": {
  7 | 		"PaymentDetail": {
  8 | 			"type": "object",
  9 | 			"properties": {
 10 | 				"paid_to": {
 11 | 					"type": "string",
 12 | 					"inferenceType": "explicit",
 13 | 					"instruction": "Who the payment was made to"
 14 | 				},
 15 | 				"check_number": {
 16 | 					"type": "string",
 17 | 					"inferenceType": "explicit",
 18 | 					"instruction": "The check number"
 19 | 				},
 20 | 				"amount": {
 21 | 					"type": "number",
 22 | 					"inferenceType": "explicit",
 23 | 					"instruction": "The payment amount"
 24 | 				}
 25 | 			}
 26 | 		},
 27 | 		"payment_details": {
 28 | 			"type": "object",
 29 | 			"properties": {
 30 | 				"paid_to": {
 31 | 					"type": "string",
 32 | 					"inferenceType": "explicit",
 33 | 					"instruction": "Who the payment was made to"
 34 | 				},
 35 | 				"check_number": {
 36 | 					"type": "string",
 37 | 					"inferenceType": "explicit",
 38 | 					"instruction": "The check number"
 39 | 				},
 40 | 				"amount": {
 41 | 					"type": "number",
 42 | 					"inferenceType": "explicit",
 43 | 					"instruction": "The payment amount"
 44 | 				}
 45 | 			}
 46 | 		},
 47 | 		"claim_summary": {
 48 | 			"type": "object",
 49 | 			"properties": {
 50 | 				"claim_number": {
 51 | 					"type": "number",
 52 | 					"inferenceType": "explicit",
 53 | 					"instruction": "The claim number"
 54 | 				},
 55 | 				"patient_name": {
 56 | 					"type": "string",
 57 | 					"inferenceType": "explicit",
 58 | 					"instruction": "The Patient Name associated with the claim"
 59 | 				},
 60 | 				"billed_amount": {
 61 | 					"type": "number",
 62 | 					"inferenceType": "explicit",
 63 | 					"instruction": "Billed Amount"
 64 | 				},
 65 | 				"provider_discount": {
 66 | 					"type": "number",
 67 | 					"inferenceType": "explicit",
 68 | 					"instruction": "Provider Discount"
 69 | 				},
 70 | 				"ucr_amount": {
 71 | 					"type": "number",
 72 | 					"inferenceType": "explicit",
 73 | 					"instruction": "UCR amount in dollars"
 74 | 				},
 75 | 				"ineligible_amount": {
 76 | 					"type": "number",
 77 | 					"inferenceType": "explicit",
 78 | 					"instruction": "Ineligible Amount in dollars"
 79 | 				},
 80 | 				"deductible_amount": {
 81 | 					"type": "number",
 82 | 					"inferenceType": "explicit",
 83 | 					"instruction": "Deductible Amount in dollars"
 84 | 				},
 85 | 				"copay_amount": {
 86 | 					"type": "number",
 87 | 					"inferenceType": "explicit",
 88 | 					"instruction": "the copay amount in dollars"
 89 | 				},
 90 | 				"payment_amount": {
 91 | 					"type": "number",
 92 | 					"inferenceType": "explicit",
 93 | 					"instruction": "Payment Amount"
 94 | 				}
 95 | 			}
 96 | 		},
 97 | 		"claim_details": {
 98 | 			"type": "object",
 99 | 			"properties": {
100 | 				"dates_of_services": {
101 | 					"type": "string",
102 | 					"inferenceType": "explicit",
103 | 					"instruction": "Dates of Services"
104 | 				},
105 | 				"procedure_code": {
106 | 					"type": "string",
107 | 					"inferenceType": "explicit",
108 | 					"instruction": "Procedure Code"
109 | 				},
110 | 				"billed_amount": {
111 | 					"type": "number",
112 | 					"inferenceType": "explicit",
113 | 					"instruction": "Billed Amount in Dollars"
114 | 				},
115 | 				"provider_discount": {
116 | 					"type": "number",
117 | 					"inferenceType": "explicit",
118 | 					"instruction": "Provider Discount in Dollars"
119 | 				},
120 | 				"max_plan_allowable": {
121 | 					"type": "number",
122 | 					"inferenceType": "explicit",
123 | 					"instruction": "Maximum Plan Allowable in Dollars"
124 | 				},
125 | 				"ineligible_amount": {
126 | 					"type": "number",
127 | 					"inferenceType": "explicit",
128 | 					"instruction": "Ineligible Amount in Dollars"
129 | 				},
130 | 				"remark_code": {
131 | 					"type": "string",
132 | 					"inferenceType": "explicit",
133 | 					"instruction": "Remark Code"
134 | 				},
135 | 				"deductible_amount": {
136 | 					"type": "number",
137 | 					"inferenceType": "explicit",
138 | 					"instruction": "Deductible Amount in Dollars"
139 | 				},
140 | 				"copay_amount": {
141 | 					"type": "number",
142 | 					"inferenceType": "explicit",
143 | 					"instruction": "Copay Amount in Dollars"
144 | 				},
145 | 				"paid_at": {
146 | 					"type": "number",
147 | 					"inferenceType": "explicit",
148 | 					"instruction": "Paid at (percentage)"
149 | 				},
150 | 				"payment_amount": {
151 | 					"type": "number",
152 | 					"inferenceType": "explicit",
153 | 					"instruction": "Payment Amount in Dollars"
154 | 				}
155 | 			}
156 | 		}
157 | 	},
158 | 	"properties": {
159 | 		"employer": {
160 | 			"type": "string",
161 | 			"inferenceType": "explicit",
162 | 			"instruction": "The employer name"
163 | 		},
164 | 		"group_number": {
165 | 			"type": "string",
166 | 			"inferenceType": "explicit",
167 | 			"instruction": "The group number"
168 | 		},
169 | 		"date": {
170 | 			"type": "string",
171 | 			"inferenceType": "explicit",
172 | 			"instruction": "The date"
173 | 		},
174 | 		"check_number": {
175 | 			"type": "string",
176 | 			"inferenceType": "explicit",
177 | 			"instruction": "The check number"
178 | 		},
179 | 		"claim_number": {
180 | 			"type": "string",
181 | 			"inferenceType": "explicit",
182 | 			"instruction": "The claim number"
183 | 		},
184 | 		"patient_name": {
185 | 			"type": "string",
186 | 			"inferenceType": "explicit",
187 | 			"instruction": "The patient name"
188 | 		},
189 | 		"member_id": {
190 | 			"type": "string",
191 | 			"inferenceType": "explicit",
192 | 			"instruction": "The member ID"
193 | 		},
194 | 		"patient_responsibility": {
195 | 			"type": "number",
196 | 			"inferenceType": "explicit",
197 | 			"instruction": "The patient's responsibility amount"
198 | 		},
199 | 		"other_credits_or_adjustments": {
200 | 			"type": "number",
201 | 			"inferenceType": "explicit",
202 | 			"instruction": "Any other credits or adjustments amount"
203 | 		},
204 | 		"total_payment": {
205 | 			"type": "number",
206 | 			"inferenceType": "explicit",
207 | 			"instruction": "The total payment amount"
208 | 		},
209 | 		"paid_to": {
210 | 			"type": "string",
211 | 			"inferenceType": "explicit",
212 | 			"instruction": "Who the payment was made to"
213 | 		},
214 | 		"payment_details": {
215 | 			"type": "array",
216 | 			"instruction": "The payment details table",
217 | 			"items": {
218 | 				"$ref": "#/definitions/payment_details"
219 | 			}
220 | 		},
221 | 		"claim_details": {
222 | 			"type": "array",
223 | 			"instruction": "details of services that form the part of the claim",
224 | 			"items": {
225 | 				"$ref": "#/definitions/claim_details"
226 | 			}
227 | 		},
228 | 		"claim_summary": {
229 | 			"$ref": "#/definitions/claim_summary"
230 | 		}
231 | 	}
232 | }


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/claims_form.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "http://json-schema.org/draft-07/schema#",
  3 |     "class": "CMS 1500 Claim Form",
  4 |     "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.",
  5 |     "definitions": {
  6 |         "Procedure_Service_Supplies": {
  7 |             "properties": {
  8 |                 "service_start_date": {
  9 |                     "type": "string",
 10 |                     "inferenceType": "explicit",
 11 |                     "instruction": "The service start date from item 24A in YYYY-MM-DD format"
 12 |                 },
 13 |                 "service_end_date": {
 14 |                     "type": "string",
 15 |                     "inferenceType": "explicit",
 16 |                     "instruction": "The service end date from item 24A in YYYY-MM-DD format"
 17 |                 },
 18 |                 "place_of_service": {
 19 |                     "type": "string",
 20 |                     "instruction": "The place the service was provided"
 21 |                 },
 22 |                 "type_of_service": {
 23 |                     "type": "string",
 24 |                     "instruction": "The type of medical service"
 25 |                 },
 26 |                 "procedure_modifier": {
 27 |                     "type": "string",
 28 |                     "inferenceType": "explicit",
 29 |                     "instruction": "The procedure modifier from item 24D"
 30 |                 },
 31 |                 "diagnosis_code": {
 32 |                     "type": "string",
 33 |                     "inferenceType": "explicit",
 34 |                     "instruction": "The diagnosis code from item 24E"
 35 |                 },
 36 |                 "procedure_code": {
 37 |                     "type": "string",
 38 |                     "instruction": "The procedure code"
 39 |                 },
 40 |                 "charge_amount": {
 41 |                     "type": "number",
 42 |                     "instruction": "The charge amount for the procedure"
 43 |                 }
 44 |             }
 45 |         }
 46 |     },
 47 |     "properties": {
 48 |         "insurance_program": {
 49 |             "type": "string",
 50 |             "inferenceType": "explicit",
 51 |             "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan"
 52 |         },
 53 |         "insured_id_number": {
 54 |             "type": "string",
 55 |             "inferenceType": "explicit",
 56 |             "instruction": "The insured's ID number from item 1a"
 57 |         },
 58 |         "patient_name": {
 59 |             "type": "string",
 60 |             "inferenceType": "explicit",
 61 |             "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format"
 62 |         },
 63 |         "patient_date_of_birth": {
 64 |             "type": "string",
 65 |             "inferenceType": "explicit",
 66 |             "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format"
 67 |         },
 68 |         "insured_name": {
 69 |             "type": "string",
 70 |             "inferenceType": "explicit",
 71 |             "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format"
 72 |         },
 73 |         "patient_address": {
 74 |             "type": "string",
 75 |             "inferenceType": "explicit",
 76 |             "instruction": "The patient's address from item 5"
 77 |         },
 78 |         "patient_relationship_to_insured": {
 79 |             "type": "string",
 80 |             "inferenceType": "explicit",
 81 |             "instruction": "The patient's relationship to insured from item 6"
 82 |         },
 83 |         "insured_address": {
 84 |             "type": "string",
 85 |             "inferenceType": "explicit",
 86 |             "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code"
 87 |         },
 88 |         "insured_phone_number": {
 89 |             "type": "string",
 90 |             "inferenceType": "explicit",
 91 |             "instruction": "The insured's phone number, including area code from item 7 "
 92 |         },
 93 |         "patient_sex": {
 94 |             "type": "string",
 95 |             "inferenceType": "explicit",
 96 |             "instruction": "The patient's address from item 8"
 97 |         },
 98 |         "patient_marital_status": {
 99 |             "type": "string",
100 |             "inferenceType": "explicit",
101 |             "instruction": "The patient's address from item 8"
102 |         },
103 |         "patient_condition_related_to": {
104 |             "type": "string",
105 |             "inferenceType": "explicit",
106 |             "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10"
107 |         },
108 |         "insured_policy_feca_number": {
109 |             "type": "string",
110 |             "inferenceType": "explicit",
111 |             "instruction": "The insured's policy group or FECA number from item 11"
112 |         },
113 |         "insured_date_of_birth": {
114 |             "type": "string",
115 |             "inferenceType": "explicit",
116 |             "instruction": "The insured's policy or group number from item 11a"
117 |         },
118 |         "insured_employer_or_school": {
119 |             "type": "string",
120 |             "inferenceType": "explicit",
121 |             "instruction": "The insured's employer or school 11b"
122 |         },
123 |         "insured_insurance_plan_name": {
124 |             "type": "string",
125 |             "inferenceType": "explicit",
126 |             "instruction": "The insured's plan name or program name from item 11c"
127 |         },
128 |         "another_health_benefit_plan_indicator": {
129 |             "type": "boolean",
130 |             "inferenceType": "explicit",
131 |             "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d"
132 |         },
133 |         "patient_signed_date": {
134 |             "type": "string",
135 |             "inferenceType": "explicit",
136 |             "instruction": "patient's or authorized person's signature date from item 12"
137 |         },
138 |         "insured_signed_date": {
139 |             "type": "string",
140 |             "inferenceType": "explicit",
141 |             "instruction": "The insured's or authorized person's signed date from item 13"
142 |         },
143 |         "illness_injury_date": {
144 |             "type": "string",
145 |             "inferenceType": "explicit",
146 |             "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format"
147 |         },
148 |         "previous_illness_date": {
149 |             "type": "string",
150 |             "inferenceType": "explicit",
151 |             "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format"
152 |         },
153 |         "unable_to_work_start_date": {
154 |             "type": "string",
155 |             "inferenceType": "explicit",
156 |             "instruction": "The dates the patient was unable to work from item 16"
157 |         },
158 |         "unable_to_work_end_date": {
159 |             "type": "string",
160 |             "inferenceType": "explicit",
161 |             "instruction": "The dates the patient was unable to work until item 16"
162 |         },
163 |         "referring_physician": {
164 |             "type": "string",
165 |             "inferenceType": "explicit",
166 |             "instruction": "The name of the referring physician from item 17"
167 |         },
168 |         "referring_physician_id": {
169 |             "type": "string",
170 |             "inferenceType": "explicit",
171 |             "instruction": "The ID number of the referring physician from item 17a"
172 |         },
173 |         "hospitalization_start_date": {
174 |             "type": "string",
175 |             "inferenceType": "explicit",
176 |             "instruction": "The hospitalization start date related to current services from item 18"
177 |         },
178 |         "hospitalization_end_date": {
179 |             "type": "string",
180 |             "inferenceType": "explicit",
181 |             "instruction": "The hospitalization end date related to current services from item 18"
182 |         },
183 |         "is_outside_lab_indicator": {
184 |             "type": "boolean",
185 |             "inferenceType": "explicit",
186 |             "instruction": "Are there outside lab charges? from item 20"
187 |         },
188 |         "outside_lab_charges": {
189 |             "type": "string",
190 |             "inferenceType": "explicit",
191 |             "instruction": "Whether outside lab was used and charges from item 20"
192 |         },
193 |         "diagnosis_1": {
194 |             "type": "string",
195 |             "inferenceType": "explicit",
196 |             "instruction": "The diagnosis or nature of illness or injury from item 21.1"
197 |         },
198 |         "diagnosis_2": {
199 |             "type": "string",
200 |             "inferenceType": "explicit",
201 |             "instruction": "The diagnosis or nature of illness or injury from item 21.2"
202 |         },
203 |         "diagnosis_3": {
204 |             "type": "string",
205 |             "inferenceType": "explicit",
206 |             "instruction": "The diagnosis or nature of illness or injury from item 21.3"
207 |         },
208 |         "diagnosis_4": {
209 |             "type": "string",
210 |             "inferenceType": "explicit",
211 |             "instruction": "The diagnosis or nature of illness or injury from item 21.4"
212 |         },
213 |         "medicaid_resubmission_number": {
214 |             "type": "string",
215 |             "inferenceType": "explicit",
216 |             "instruction": "MEDICAID RESUBMISSION NUMBER from item 22"
217 |         },
218 |         "medicaid_original_ref_number": {
219 |             "type": "string",
220 |             "inferenceType": "explicit",
221 |             "instruction": "Medicaid - Original ref no. from item 22"
222 |         },
223 |         "prior_authorization_number": {
224 |             "type": "string",
225 |             "inferenceType": "explicit",
226 |             "instruction": "The prior authorization number from item 23"
227 |         },
228 |         "medical_procedures": {
229 |             "type": "array",
230 |             "instruction": "The list of medical procedures from the table in item 24",
231 |             "items": {
232 |                 "$ref": "#/definitions/Procedure_Service_Supplies"
233 |             }
234 |         },
235 |         "tax_id_type": {
236 |             "type": "string",
237 |             "inferenceType": "explicit",
238 |             "instruction": "The tax ID type (SSN or EIN) from item 25"
239 |         },
240 |         "tax_id_number": {
241 |             "type": "string",
242 |             "inferenceType": "explicit",
243 |             "instruction": "The federal tax ID number (SSN or EIN) from item 25"
244 |         },
245 |         "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"},
246 |         "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"}
247 |     }
248 | }


--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/blueprint_schema.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "http://json-schema.org/draft-07/schema#",
  3 |     "class": "CMS 1500 Claim Form",
  4 |     "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.",
  5 |     "definitions": {
  6 |         "Procedure_Service_Supplies": {
  7 |             "properties": {
  8 |                 "service_start_date": {
  9 |                     "type": "string",
 10 |                     "inferenceType": "explicit",
 11 |                     "instruction": "The service start date from item 24A in YYYY-MM-DD format"
 12 |                 },
 13 |                 "service_end_date": {
 14 |                     "type": "string",
 15 |                     "inferenceType": "explicit",
 16 |                     "instruction": "The service end date from item 24A in YYYY-MM-DD format"
 17 |                 },
 18 |                 "place_of_service": {
 19 |                     "type": "string",
 20 |                     "instruction": "The place the service was provided"
 21 |                 },
 22 |                 "type_of_service": {
 23 |                     "type": "string",
 24 |                     "instruction": "The type of medical service"
 25 |                 },
 26 |                 "procedure_modifier": {
 27 |                     "type": "string",
 28 |                     "inferenceType": "explicit",
 29 |                     "instruction": "The procedure modifier from item 24D"
 30 |                 },
 31 |                 "diagnosis_code": {
 32 |                     "type": "string",
 33 |                     "inferenceType": "explicit",
 34 |                     "instruction": "The diagnosis code from item 24E"
 35 |                 },
 36 |                 "procedure_code": {
 37 |                     "type": "string",
 38 |                     "instruction": "The procedure code"
 39 |                 },
 40 |                 "charge_amount": {
 41 |                     "type": "number",
 42 |                     "instruction": "The charge amount for the procedure"
 43 |                 }
 44 |             }
 45 |         }
 46 |     },
 47 |     "properties": {
 48 |         "insurance_program": {
 49 |             "type": "string",
 50 |             "inferenceType": "explicit",
 51 |             "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan"
 52 |         },
 53 |         "insured_id_number": {
 54 |             "type": "string",
 55 |             "inferenceType": "explicit",
 56 |             "instruction": "The insured's ID number from item 1a"
 57 |         },
 58 |         "patient_name": {
 59 |             "type": "string",
 60 |             "inferenceType": "explicit",
 61 |             "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format"
 62 |         },
 63 |         "patient_date_of_birth": {
 64 |             "type": "string",
 65 |             "inferenceType": "explicit",
 66 |             "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format"
 67 |         },
 68 |         "insured_name": {
 69 |             "type": "string",
 70 |             "inferenceType": "explicit",
 71 |             "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format"
 72 |         },
 73 |         "patient_address": {
 74 |             "type": "string",
 75 |             "inferenceType": "explicit",
 76 |             "instruction": "The patient's address from item 5"
 77 |         },
 78 |         "patient_relationship_to_insured": {
 79 |             "type": "string",
 80 |             "inferenceType": "explicit",
 81 |             "instruction": "The patient's relationship to insured from item 6"
 82 |         },
 83 |         "insured_address": {
 84 |             "type": "string",
 85 |             "inferenceType": "explicit",
 86 |             "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code"
 87 |         },
 88 |         "insured_phone_number": {
 89 |             "type": "string",
 90 |             "inferenceType": "explicit",
 91 |             "instruction": "The insured's phone number, including area code from item 7 "
 92 |         },
 93 |         "patient_sex": {
 94 |             "type": "string",
 95 |             "inferenceType": "explicit",
 96 |             "instruction": "The patient's address from item 8"
 97 |         },
 98 |         "patient_marital_status": {
 99 |             "type": "string",
100 |             "inferenceType": "explicit",
101 |             "instruction": "The patient's address from item 8"
102 |         },
103 |         "patient_condition_related_to": {
104 |             "type": "string",
105 |             "inferenceType": "explicit",
106 |             "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10"
107 |         },
108 |         "insured_policy_feca_number": {
109 |             "type": "string",
110 |             "inferenceType": "explicit",
111 |             "instruction": "The insured's policy group or FECA number from item 11"
112 |         },
113 |         "insured_date_of_birth": {
114 |             "type": "string",
115 |             "inferenceType": "explicit",
116 |             "instruction": "The insured's policy or group number from item 11a"
117 |         },
118 |         "insured_employer_or_school": {
119 |             "type": "string",
120 |             "inferenceType": "explicit",
121 |             "instruction": "The insured's employer or school 11b"
122 |         },
123 |         "insured_insurance_plan_name": {
124 |             "type": "string",
125 |             "inferenceType": "explicit",
126 |             "instruction": "The insured's plan name or program name from item 11c"
127 |         },
128 |         "another_health_benefit_plan_indicator": {
129 |             "type": "boolean",
130 |             "inferenceType": "explicit",
131 |             "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d"
132 |         },
133 |         "patient_signed_date": {
134 |             "type": "string",
135 |             "inferenceType": "explicit",
136 |             "instruction": "patient's or authorized person's signature date from item 12"
137 |         },
138 |         "insured_signed_date": {
139 |             "type": "string",
140 |             "inferenceType": "explicit",
141 |             "instruction": "The insured's or authorized person's signed date from item 13"
142 |         },
143 |         "illness_injury_date": {
144 |             "type": "string",
145 |             "inferenceType": "explicit",
146 |             "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format"
147 |         },
148 |         "previous_illness_date": {
149 |             "type": "string",
150 |             "inferenceType": "explicit",
151 |             "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format"
152 |         },
153 |         "unable_to_work_start_date": {
154 |             "type": "string",
155 |             "inferenceType": "explicit",
156 |             "instruction": "The dates the patient was unable to work from item 16"
157 |         },
158 |         "unable_to_work_end_date": {
159 |             "type": "string",
160 |             "inferenceType": "explicit",
161 |             "instruction": "The dates the patient was unable to work until item 16"
162 |         },
163 |         "referring_physician": {
164 |             "type": "string",
165 |             "inferenceType": "explicit",
166 |             "instruction": "The name of the referring physician from item 17"
167 |         },
168 |         "referring_physician_id": {
169 |             "type": "string",
170 |             "inferenceType": "explicit",
171 |             "instruction": "The ID number of the referring physician from item 17a"
172 |         },
173 |         "hospitalization_start_date": {
174 |             "type": "string",
175 |             "inferenceType": "explicit",
176 |             "instruction": "The hospitalization start date related to current services from item 18"
177 |         },
178 |         "hospitalization_end_date": {
179 |             "type": "string",
180 |             "inferenceType": "explicit",
181 |             "instruction": "The hospitalization end date related to current services from item 18"
182 |         },
183 |         "is_outside_lab_indicator": {
184 |             "type": "boolean",
185 |             "inferenceType": "explicit",
186 |             "instruction": "Are there outside lab charges? from item 20"
187 |         },
188 |         "outside_lab_charges": {
189 |             "type": "string",
190 |             "inferenceType": "explicit",
191 |             "instruction": "Whether outside lab was used and charges from item 20"
192 |         },
193 |         "diagnosis_1": {
194 |             "type": "string",
195 |             "inferenceType": "explicit",
196 |             "instruction": "The diagnosis or nature of illness or injury from item 21.1"
197 |         },
198 |         "diagnosis_2": {
199 |             "type": "string",
200 |             "inferenceType": "explicit",
201 |             "instruction": "The diagnosis or nature of illness or injury from item 21.2"
202 |         },
203 |         "diagnosis_3": {
204 |             "type": "string",
205 |             "inferenceType": "explicit",
206 |             "instruction": "The diagnosis or nature of illness or injury from item 21.3"
207 |         },
208 |         "diagnosis_4": {
209 |             "type": "string",
210 |             "inferenceType": "explicit",
211 |             "instruction": "The diagnosis or nature of illness or injury from item 21.4"
212 |         },
213 |         "medicaid_resubmission_number": {
214 |             "type": "string",
215 |             "inferenceType": "explicit",
216 |             "instruction": "MEDICAID RESUBMISSION NUMBER from item 22"
217 |         },
218 |         "medicaid_original_ref_number": {
219 |             "type": "string",
220 |             "inferenceType": "explicit",
221 |             "instruction": "Medicaid - Original ref no. from item 22"
222 |         },
223 |         "prior_authorization_number": {
224 |             "type": "string",
225 |             "inferenceType": "explicit",
226 |             "instruction": "The prior authorization number from item 23"
227 |         },
228 |         "medical_procedures": {
229 |             "type": "array",
230 |             "instruction": "The list of medical procedures from the table in item 24",
231 |             "items": {
232 |                 "$ref": "#/definitions/Procedure_Service_Supplies"
233 |             }
234 |         },
235 |         "tax_id_type": {
236 |             "type": "string",
237 |             "inferenceType": "explicit",
238 |             "instruction": "The tax ID type (SSN or EIN) from item 25"
239 |         },
240 |         "tax_id_number": {
241 |             "type": "string",
242 |             "inferenceType": "explicit",
243 |             "instruction": "The federal tax ID number (SSN or EIN) from item 25"
244 |         },
245 |         "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"},
246 |         "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"}
247 |     }
248 | }


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/blueprint/claims_form.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "http://json-schema.org/draft-07/schema#",
  3 |     "class": "CMS 1500 Claim Form",
  4 |     "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.",
  5 |     "definitions": {
  6 |         "Procedure_Service_Supplies": {
  7 |             "properties": {
  8 |                 "service_start_date": {
  9 |                     "type": "string",
 10 |                     "inferenceType": "explicit",
 11 |                     "instruction": "The service start date from item 24A in YYYY-MM-DD format"
 12 |                 },
 13 |                 "service_end_date": {
 14 |                     "type": "string",
 15 |                     "inferenceType": "explicit",
 16 |                     "instruction": "The service end date from item 24A in YYYY-MM-DD format"
 17 |                 },
 18 |                 "place_of_service": {
 19 |                     "type": "string",
 20 |                     "instruction": "The place the service was provided"
 21 |                 },
 22 |                 "type_of_service": {
 23 |                     "type": "string",
 24 |                     "instruction": "The type of medical service"
 25 |                 },
 26 |                 "procedure_modifier": {
 27 |                     "type": "string",
 28 |                     "inferenceType": "explicit",
 29 |                     "instruction": "The procedure modifier from item 24D"
 30 |                 },
 31 |                 "diagnosis_code": {
 32 |                     "type": "string",
 33 |                     "inferenceType": "explicit",
 34 |                     "instruction": "The diagnosis code from item 24E"
 35 |                 },
 36 |                 "procedure_code": {
 37 |                     "type": "string",
 38 |                     "instruction": "The procedure code"
 39 |                 },
 40 |                 "charge_amount": {
 41 |                     "type": "number",
 42 |                     "instruction": "The charge amount for the procedure"
 43 |                 }
 44 |             }
 45 |         }
 46 |     },
 47 |     "properties": {
 48 |         "insurance_program": {
 49 |             "type": "string",
 50 |             "inferenceType": "explicit",
 51 |             "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan"
 52 |         },
 53 |         "insured_id_number": {
 54 |             "type": "string",
 55 |             "inferenceType": "explicit",
 56 |             "instruction": "The insured's ID number from item 1a"
 57 |         },
 58 |         "patient_name": {
 59 |             "type": "string",
 60 |             "inferenceType": "explicit",
 61 |             "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format"
 62 |         },
 63 |         "patient_date_of_birth": {
 64 |             "type": "string",
 65 |             "inferenceType": "explicit",
 66 |             "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format"
 67 |         },
 68 |         "insured_name": {
 69 |             "type": "string",
 70 |             "inferenceType": "explicit",
 71 |             "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format"
 72 |         },
 73 |         "patient_address": {
 74 |             "type": "string",
 75 |             "inferenceType": "explicit",
 76 |             "instruction": "The patient's address from item 5"
 77 |         },
 78 |         "patient_relationship_to_insured": {
 79 |             "type": "string",
 80 |             "inferenceType": "explicit",
 81 |             "instruction": "The patient's relationship to insured from item 6"
 82 |         },
 83 |         "insured_address": {
 84 |             "type": "string",
 85 |             "inferenceType": "explicit",
 86 |             "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code"
 87 |         },
 88 |         "insured_phone_number": {
 89 |             "type": "string",
 90 |             "inferenceType": "explicit",
 91 |             "instruction": "The insured's phone number, including area code from item 7 "
 92 |         },
 93 |         "patient_sex": {
 94 |             "type": "string",
 95 |             "inferenceType": "explicit",
 96 |             "instruction": "The patient's address from item 8"
 97 |         },
 98 |         "patient_marital_status": {
 99 |             "type": "string",
100 |             "inferenceType": "explicit",
101 |             "instruction": "The patient's address from item 8"
102 |         },
103 |         "patient_condition_related_to": {
104 |             "type": "string",
105 |             "inferenceType": "explicit",
106 |             "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10"
107 |         },
108 |         "insured_policy_feca_number": {
109 |             "type": "string",
110 |             "inferenceType": "explicit",
111 |             "instruction": "The insured's policy group or FECA number from item 11"
112 |         },
113 |         "insured_date_of_birth": {
114 |             "type": "string",
115 |             "inferenceType": "explicit",
116 |             "instruction": "The insured's policy or group number from item 11a"
117 |         },
118 |         "insured_employer_or_school": {
119 |             "type": "string",
120 |             "inferenceType": "explicit",
121 |             "instruction": "The insured's employer or school 11b"
122 |         },
123 |         "insured_insurance_plan_name": {
124 |             "type": "string",
125 |             "inferenceType": "explicit",
126 |             "instruction": "The insured's plan name or program name from item 11c"
127 |         },
128 |         "another_health_benefit_plan_indicator": {
129 |             "type": "boolean",
130 |             "inferenceType": "explicit",
131 |             "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d"
132 |         },
133 |         "patient_signed_date": {
134 |             "type": "string",
135 |             "inferenceType": "explicit",
136 |             "instruction": "patient's or authorized person's signature date from item 12"
137 |         },
138 |         "insured_signed_date": {
139 |             "type": "string",
140 |             "inferenceType": "explicit",
141 |             "instruction": "The insured's or authorized person's signed date from item 13"
142 |         },
143 |         "illness_injury_date": {
144 |             "type": "string",
145 |             "inferenceType": "explicit",
146 |             "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format"
147 |         },
148 |         "previous_illness_date": {
149 |             "type": "string",
150 |             "inferenceType": "explicit",
151 |             "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format"
152 |         },
153 |         "unable_to_work_start_date": {
154 |             "type": "string",
155 |             "inferenceType": "explicit",
156 |             "instruction": "The dates the patient was unable to work from item 16"
157 |         },
158 |         "unable_to_work_end_date": {
159 |             "type": "string",
160 |             "inferenceType": "explicit",
161 |             "instruction": "The dates the patient was unable to work until item 16"
162 |         },
163 |         "referring_physician": {
164 |             "type": "string",
165 |             "inferenceType": "explicit",
166 |             "instruction": "The name of the referring physician from item 17"
167 |         },
168 |         "referring_physician_id": {
169 |             "type": "string",
170 |             "inferenceType": "explicit",
171 |             "instruction": "The ID number of the referring physician from item 17a"
172 |         },
173 |         "hospitalization_start_date": {
174 |             "type": "string",
175 |             "inferenceType": "explicit",
176 |             "instruction": "The hospitalization start date related to current services from item 18"
177 |         },
178 |         "hospitalization_end_date": {
179 |             "type": "string",
180 |             "inferenceType": "explicit",
181 |             "instruction": "The hospitalization end date related to current services from item 18"
182 |         },
183 |         "is_outside_lab_indicator": {
184 |             "type": "boolean",
185 |             "inferenceType": "explicit",
186 |             "instruction": "Are there outside lab charges? from item 20"
187 |         },
188 |         "outside_lab_charges": {
189 |             "type": "string",
190 |             "inferenceType": "explicit",
191 |             "instruction": "Whether outside lab was used and charges from item 20"
192 |         },
193 |         "diagnosis_1": {
194 |             "type": "string",
195 |             "inferenceType": "explicit",
196 |             "instruction": "The diagnosis or nature of illness or injury from item 21.1"
197 |         },
198 |         "diagnosis_2": {
199 |             "type": "string",
200 |             "inferenceType": "explicit",
201 |             "instruction": "The diagnosis or nature of illness or injury from item 21.2"
202 |         },
203 |         "diagnosis_3": {
204 |             "type": "string",
205 |             "inferenceType": "explicit",
206 |             "instruction": "The diagnosis or nature of illness or injury from item 21.3"
207 |         },
208 |         "diagnosis_4": {
209 |             "type": "string",
210 |             "inferenceType": "explicit",
211 |             "instruction": "The diagnosis or nature of illness or injury from item 21.4"
212 |         },
213 |         "medicaid_resubmission_number": {
214 |             "type": "string",
215 |             "inferenceType": "explicit",
216 |             "instruction": "MEDICAID RESUBMISSION NUMBER from item 22"
217 |         },
218 |         "medicaid_original_ref_number": {
219 |             "type": "string",
220 |             "inferenceType": "explicit",
221 |             "instruction": "Medicaid - Original ref no. from item 22"
222 |         },
223 |         "prior_authorization_number": {
224 |             "type": "string",
225 |             "inferenceType": "explicit",
226 |             "instruction": "The prior authorization number from item 23"
227 |         },
228 |         "medical_procedures": {
229 |             "type": "array",
230 |             "instruction": "The list of medical procedures from the table in item 24",
231 |             "items": {
232 |                 "$ref": "#/definitions/Procedure_Service_Supplies"
233 |             }
234 |         },
235 |         "tax_id_type": {
236 |             "type": "string",
237 |             "inferenceType": "explicit",
238 |             "instruction": "The tax ID type (SSN or EIN) from item 25"
239 |         },
240 |         "tax_id_number": {
241 |             "type": "string",
242 |             "inferenceType": "explicit",
243 |             "instruction": "The federal tax ID number (SSN or EIN) from item 25"
244 |         },
245 |         "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"},
246 |         "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"}
247 |     }
248 | }


--------------------------------------------------------------------------------
/10-Understanding-BDA/utils/display_functions.py:
--------------------------------------------------------------------------------
  1 | import ipywidgets as widgets
  2 | from IPython.display import display, HTML
  3 | import pandas as pd
  4 | from PIL import Image
  5 | import io
  6 | import boto3
  7 | from urllib.parse import urlparse
  8 | from pdf2image import convert_from_bytes
  9 | 
 10 | 
 11 | s3 = boto3.client('s3')
 12 | 
 13 | 
 14 | onclick_function = """
 15 | <script>
 16 |     function handleClick(event) {
 17 |     
 18 |         var row = event.target
 19 |         row.style.backgroundColor = '#e0e0e0';
 20 |         if (!row) return;  // Click wasn't on a row
 21 |         
 22 |         // Get the bbox data from the row
 23 |         var bbox = row.getAttribute('data-bbox');
 24 |         if (!bbox) return;  // No bbox data found
 25 |         
 26 |         // Parse the bbox string back to array
 27 |         bbox = JSON.parse(bbox);
 28 |         
 29 |         // Send custom event to Python
 30 |         var event = new CustomEvent('bbox_click', { detail: bbox });
 31 |         document.dispatchEvent(event);
 32 |         
 33 |         // Highlight the clicked row
 34 |         var rows = document.getElementsByClassName('bbox-row');
 35 |         for(var i = 0; i < rows.length; i++) {
 36 |             rows[i].style.backgroundColor = '#f8f8f8';
 37 |         }
 38 |         row.style.backgroundColor = '#e0e0e0';
 39 |     }
 40 | </script>
 41 | """
 42 | 
 43 | def load_image(uri):
 44 |     if uri.startswith('s3://'):
 45 |         bucket, key = urlparse(uri).netloc, urlparse(uri).path.lstrip('/')
 46 |         file_content = s3.get_object(Bucket=bucket, Key=key)['Body'].read()
 47 |     else:
 48 |         file_content = open(uri, 'rb').read()
 49 |     
 50 |     if uri.lower().endswith('.pdf'):
 51 |         img_io = io.BytesIO()
 52 |         convert_from_bytes(file_content)[0].save(img_io, format='JPEG')
 53 |         return img_io.getvalue()
 54 |     
 55 |     img = Image.open(io.BytesIO(file_content))
 56 |     if img.format != 'JPEG':
 57 |         img_io = io.BytesIO()
 58 |         img.save(img_io, format='JPEG')
 59 |         return img_io.getvalue()
 60 |     return file_content
 61 |     
 62 | 
 63 | def get_kv_html(kv_pairs):
 64 |     # Create key-value pairs display
 65 |     kv_html = onclick_function
 66 |     kv_html += """
 67 |     <div style="border: 0px solid #ddd; padding: 10px; margin: 1px; overflow-y: auto;">        
 68 |         <table style="width: 100%; border: 0px solid #888; border-collapse: separate; border-spacing: 1 1px;">
 69 |             <style>
 70 |                 td {
 71 |                     padding: 2px 2px;
 72 |                     border: 0px solid #ddd; 
 73 |                 }
 74 |             </style>
 75 |     """
 76 |     
 77 |     for i, (key, (value, confidence)) in enumerate(kv_pairs.items()):
 78 |         kv_html += '<tr onclick=handleClick(event) data-bbox=\'(10,40,110,200)\'><td width=100%>'
 79 |         kv_html += create_key_value_box(key, value, confidence)
 80 |         kv_html += '</td></tr>'
 81 |     kv_html += """
 82 |         </table>
 83 |     </div>
 84 |     """
 85 |     return kv_html
 86 | 
 87 | def create_key_value_box(key, value, confidence):
 88 |     html = f"""
 89 |        <div style="
 90 |             border: 1px solid #e0e0e0;
 91 |             border-radius: 4px;
 92 |             padding: 2px;
 93 |             margin: 2px 1;
 94 |             background-color: #f8f9fa;
 95 |             width: 100%;
 96 |             max-width: 100%;
 97 |             font-family: sans-serif;"
 98 |         >
 99 |         <div style="
100 |             display: flex;
101 |             justify-content: space-between;
102 |             align-items: center;
103 |             margin-bottom: 0px;
104 |         ">
105 |             <div style="font-weight: 600; font-size: 0.9em; color: #333;">{key}</div>
106 |             <div style="
107 |                 background-color: #fff;
108 |                 padding: 2px 4px;
109 |                 border-radius: 4px;
110 |                 font-size: 0.9em;
111 |                 color: #666;
112 |             ">{confidence}</div>
113 |         </div>
114 |         <div style="color: #666; font-size: 0.9em">{value}</div>
115 |     </div>
116 |     """
117 |     return html
118 |     
119 | def display_result(document_image_uri, kvpairs):
120 |     # Create the layout with top alignment
121 |     main_hbox_layout = widgets.Layout(
122 |         width='100%',
123 |         display='flex',
124 |         flex_flow='row nowrap',
125 |         align_items='stretch',
126 |         margin='0'
127 |     )
128 | 
129 |     image_widget = widgets.Image(
130 |         value=b'',
131 |         format='png',
132 |         width='auto',
133 |         height='auto'
134 |     )
135 |     image_widget.value = load_image(image_path=document_image_uri)
136 |     image_container = widgets.Box(
137 |         children=[image_widget],
138 |         layout=widgets.Layout(
139 |             border='1px solid #888',
140 |             padding='1px',
141 |             margin='2px',
142 |             width='70%',
143 |             flex='0 0 70%',
144 |             min_width='300px',
145 |             height='auto',
146 |             display='flex',
147 |             align_items='stretch',
148 |             justify_content='center'
149 |         )
150 |     )
151 |     kv_html = get_kv_html(kvpairs)
152 |     # Add content to the Forms tab
153 |     result_widget = widgets.HTML(
154 |         value=kv_html,
155 |         layout=widgets.Layout(
156 |             border='0px solid #888',            
157 |             width='100%', 
158 |             height='10px',
159 |             flex='0 0 100%',       # flex: grow shrink basis
160 |             margin='5px',
161 |             min_width='300px'
162 |         )
163 |     )
164 |     result_container = widgets.VBox(
165 |         children=[result_widget],
166 |         layout=widgets.Layout(
167 |             border='0px solid #888',
168 |             padding='4px',
169 |             margin='5px',
170 |             width='30%',
171 |             flex='0 0 30%',
172 |             min_width='200px',
173 |             justify_content='center'
174 |         )
175 |     )
176 |     # Add custom CSS for scrollable container
177 |     custom_style = """
178 |     <style>
179 |         .scrollable-vbox {
180 |             max-height: 1000px;
181 |             overflow-y: auto;
182 |             overflow-x: hidden;
183 |         }
184 |         .main-container {
185 |             display: flex;
186 |             height: 1000px;  /* Match with max-height above */
187 |         }
188 |     </style>
189 |     """
190 |     display(HTML(custom_style))
191 |     # Create the main layout
192 |     main_layout = widgets.HBox(
193 |         children=[image_container, result_container],
194 |         layout=main_hbox_layout
195 |     )
196 |     # Add the scrollable class to the right VBox
197 |     result_widget.add_class('scrollable-vbox')
198 |     main_layout.add_class('main-container')
199 |     # Display the main layout
200 |     display(main_layout)
201 | 
202 | def display_multiple(views, view_titles = None):
203 |     main_tab = widgets.Tab()
204 |     for i, view in enumerate(views):
205 |         main_tab.children = (*main_tab.children, view)
206 |         tab_title = view_titles[i] if view_titles and view_titles[i] else f'Document {i}'
207 |         main_tab.set_title(i, title=tab_title)
208 |     display(main_tab)
209 | 
210 | def create_form_view(forms_data):
211 | 
212 |     styles = """
213 |     <style>
214 |         .kv-container{display:flex;flex-direction:column;gap:4px;margin:4px;width:100%}
215 |         .kv-box{border:0px solid #e0e0e0;border-radius:4px;padding:4px;margin:0;background-color:#f8f9fa;width:auto}
216 |         .kv-item{display:flex;justify-content:space-between;align-items:center;margin-bottom:2px}
217 |         .kc-item{background-color:#fff;display:flex;justify-content:space-between;align-items:center;margin-bottom:2px}
218 |         .key{font-weight:600;padding:1px 4px;font-size:.85em;color:#333}
219 |         .value{background-color:#fff;padding:1px 4px;border-radius:4px;font-size:.85em;color:#666;margin-top:1px}
220 |         .confidence{padding:1px 4px;border-radius:4px;font-size:.85em;color:#2196F3}
221 |         .nested-container{margin-left:8px;margin-top:4px;border-left:2px solid #e0e0e0;padding-left:4px}
222 |         .parent-key{color:#6a1b9a;font-size:.9em;font-weight:600;margin-bottom:2px}
223 |     </style>
224 |     """
225 | 
226 |     def render_nested_keys(data):
227 |         if not isinstance(data, dict): 
228 |             return f'<div class="value">{data}</div>'
229 |         html = ""
230 |         for key, value in data.items():
231 |             if isinstance(value, dict) and 'value' in value:
232 |                 conf = value.get('confidence', 0) * 100
233 |                 html += f"""
234 |                     <div class='kv-box'>
235 |                         <div class='kv-item'><div class='key'>{key}</div></div>
236 |                         <div class='kc-item' onclick=handleClick(event) data-bbox='(10,40,110,200)'>
237 |                             <div class="value">{value['value']}</div>
238 |                             <div class='confidence'>{conf:.1f}%</div>
239 |                         </div>
240 |                     </div>"""
241 |             else:
242 |                 html += f"""
243 |                     <div class='kv-box'>
244 |                         <div class='kv-item'><div class='key'>{key}</div></div>
245 |                         <div class="nested-container">{render_nested_keys(value)}</div>
246 |                     </div>"""
247 |         return html
248 | 
249 |     return HTML(f"{styles}<script>function handleClick(e){{console.log(e.currentTarget.dataset.bbox)}}</script><div class='kv-container'>{render_nested_keys(forms_data)}</div>")
250 | 
251 | 
252 | def create_table_view(tables_data):
253 |     styles = """
254 |     <style>
255 |         .table-wrapper {
256 |             width: 100%;
257 |             overflow-x: auto;
258 |             white-space: nowrap;
259 |             -webkit-overflow-scrolling: touch;
260 |         }
261 |         .table-container{margin:20px}
262 |         .table-view{
263 |             width: auto;
264 |             min-width: 100%;
265 |             border-collapse:collapse;
266 |             background-color:white;
267 |             table-layout: auto;
268 |         }
269 |         .table-view th{
270 |             background-color:#f8f9fa;
271 |             padding:12px;
272 |             text-align:left;
273 |             font-size:0.85em;
274 |             border:1px solid #dee2e6;
275 |             white-space: nowrap;
276 |         }
277 |         .table-view td{
278 |             padding:12px;
279 |             border:1px solid #dee2e6;
280 |             font-size:0.8em;
281 |             white-space: nowrap;
282 |         }
283 |         .confidence{color:#2196F3;font-size:0.9em}
284 |     </style>
285 |     """
286 |     
287 |     def process_table(table_data):
288 |         def format_cell(cell):
289 |             if isinstance(cell, dict) and 'value' in cell:
290 |                 conf = f"<span class='confidence'>({cell.get('confidence', 0):.1%})</span>" if 'confidence' in cell else ""
291 |                 return f"{cell['value']}{conf}"
292 |             return str(cell)
293 |         
294 |         return pd.DataFrame([{k: format_cell(v) for k, v in row.items()} for row in table_data])
295 |     
296 |     tables_html = "".join(
297 |         f"""
298 |         <div class="table-container">
299 |             <h3>{table_name}</h3>
300 |             <div class="table-wrapper">
301 |                 {process_table(table_data).to_html(classes='table-view', index=False, escape=False)}
302 |             </div>
303 |         </div>
304 |         """
305 |         for table_name, table_data in tables_data.items() if table_data
306 |     )
307 |     
308 |     return HTML(f"{styles}{tables_html}")
309 | 
310 | def segment_view(document_image_uris, inference_result):
311 |     # Create the layout with top alignment
312 |     main_hbox_layout = widgets.Layout(
313 |         width='100%',
314 |         display='flex',
315 |         flex_flow='row nowrap',
316 |         align_items='stretch',
317 |         margin='0'
318 |     )
319 |     image_widget = widgets.Image(
320 |         value=b'',
321 |         format='png',
322 |         width='auto',
323 |         height='auto'
324 |     )
325 |     image_widget.value = load_image(uri=document_image_uris[0])
326 |     image_container = widgets.VBox(
327 |         children=[image_widget],
328 |         layout=widgets.Layout(
329 |             border='0px solid #888',
330 |             padding='1px',
331 |             margin='2px',
332 |             width='60%',
333 |             flex='0 0 60%',
334 |             min_width='300px',
335 |             height='auto',
336 |             display='flex',
337 |             align_items='stretch',
338 |             justify_content='center'
339 |         )
340 |     )
341 |     
342 |     
343 |     # Create tabs for different views
344 |     tab = widgets.Tab(
345 |         layout=widgets.Layout(
346 |             width='40%',
347 |             flex='0 0 40%',
348 |             min_width='300px',
349 |             height='auto'
350 |         )
351 |     )
352 |     form_view = widgets.Output()
353 |     table_view = widgets.Output()
354 |     
355 |     with form_view:
356 |         display(create_form_view(inference_result['forms']))
357 |         
358 |     with table_view:
359 |         display(create_table_view(inference_result['tables']))
360 |     
361 |     tab.children = [form_view, table_view]
362 |     tab.set_title(0, 'Key Value Pairs')
363 |     tab.set_title(1, 'Tables')
364 | 
365 |     
366 |     # Add custom CSS for scrollable container
367 |     custom_style = """
368 |     <style>
369 |         .scrollable-vbox {
370 |             max-height: 1000px;
371 |             overflow-y: auto;
372 |             overflow-x: hidden;
373 |         }
374 |         .main-container {
375 |             display: flex;
376 |             height: 1000px;  /* Match with max-height above */
377 |         }
378 |         .jupyter-widgets-output-area .p-TabBar-tab {
379 |             min-width: fit-content !important;
380 |             max-width: fit-content !important;
381 |             padding: 6px 10px !important;
382 |     </style>
383 |     """
384 |     display(HTML(custom_style))
385 |     
386 |     # Create the main layout
387 |     main_layout = widgets.HBox(
388 |         children=[image_container, tab],
389 |         layout=main_hbox_layout
390 |     )
391 | 
392 |     
393 |     # Add the scrollable class to the right VBox
394 |     main_layout.add_class('main-container')
395 |     return main_layout
396 | 
397 | 
398 | def get_view(data, display_function=None):
399 |     out = widgets.Output()
400 |     with out:
401 |         if callable(display_function):
402 |             display_function(data)
403 |         else:
404 |             display(data)
405 |     return out


--------------------------------------------------------------------------------
/10-Understanding-BDA/utils/helper_functions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import boto3
  4 | from urllib.parse import urlparse
  5 | import requests
  6 | import base64
  7 | import io
  8 | from PIL import Image
  9 | from PyPDF2 import PdfReader, PdfWriter
 10 | from botocore.exceptions import ClientError
 11 | from IPython.display import HTML
 12 | from IPython.display import display
 13 | from botocore.auth import SigV4Auth
 14 | from botocore.awsrequest import AWSRequest
 15 | import json
 16 | import ipywidgets as widgets
 17 | import pandas as pd
 18 | 
 19 | 
 20 | s3_client = boto3.client("s3")
 21 | bda_client = boto3.client('bedrock-data-automation')
 22 | bda_runtime_client = boto3.client('bedrock-data-automation-runtime')
 23 | 
 24 | 
 25 | def pil_to_bytes(image):
 26 |     byte_arr = io.BytesIO()
 27 |     image.save(byte_arr, format='PNG')
 28 |     return byte_arr.getvalue()
 29 | 
 30 | 
 31 | def display_image(image):
 32 |     image_widget = widgets.Image(value=pil_to_bytes(image), format='png')
 33 |     image_widget.layout.width = '400px'
 34 |     image_widget.layout.height = 'auto'
 35 |     image_widget.layout.object_fit = 'contain'
 36 |     return image_widget
 37 | 
 38 | 
 39 | def json_to_html(json_obj, indent=0):
 40 |     result = []
 41 |     if isinstance(json_obj, dict):
 42 |         result.append('<table class="json-object">')
 43 |         for key, value in json_obj.items():
 44 |             result.append('<tr>')
 45 |             result.append(f'<td class="key">{key}</td>')
 46 |             result.append('<td class="value">')
 47 |             result.append(json_to_html(value, indent + 1))
 48 |             result.append('</td>')
 49 |             result.append('</tr>')
 50 |         result.append('</table>')
 51 |     elif isinstance(json_obj, list):
 52 |         result.append('<table class="json-array">')
 53 |         for i, item in enumerate(json_obj):
 54 |             result.append('<tr>')
 55 |             result.append(f'<td class="key">{i}</td>')
 56 |             result.append('<td class="value">')
 57 |             result.append(json_to_html(item, indent + 1))
 58 |             result.append('</td>')
 59 |             result.append('</tr>')
 60 |         result.append('</table>')
 61 |     elif isinstance(json_obj, (str, int, float, bool)) or json_obj is None:
 62 |         if isinstance(json_obj, str):
 63 |             result.append(f'<span class="string">"{json_obj}"</span>')
 64 |         elif isinstance(json_obj, bool):
 65 |             result.append(f'<span class="boolean">{str(json_obj).lower()}</span>')
 66 |         elif json_obj is None:
 67 |             result.append('<span class="null">null</span>')
 68 |         else:
 69 |             result.append(f'<span class="number">{json_obj}</span>')
 70 |     return ''.join(result)
 71 |     
 72 | def display_json(json_data, title):
 73 |     html_content = f"""
 74 |     <div class="json-container">
 75 |         <h3 class="json-title">{title}</h3>
 76 |         <div class="json-viewer">
 77 |             {json_to_html(json_data)}
 78 |         </div>
 79 |     </div>
 80 |     <style>
 81 |         .json-container {{
 82 |             margin-bottom: 20px;
 83 |         }}
 84 |         .json-title {{
 85 |             font-family: sans-serif;
 86 |             font-size: 18px;
 87 |             font-weight: bold;
 88 |             margin-bottom: 10px;
 89 |             color: #333;
 90 |         }}
 91 |         .json-viewer {{
 92 |             font-family: monospace;
 93 |             font-size: 14px;
 94 |             line-height: 1.5;
 95 |             background-color: #f8f8f8;
 96 |             border: 1px solid #ddd;
 97 |             border-radius: 4px;
 98 |             padding: 10px;
 99 |             max-height: 500px;
100 |             overflow: auto;
101 |         }}
102 |         .json-object, .json-array {{
103 |             border-collapse: collapse;
104 |             margin-left: 20px;
105 |         }}
106 |         .key {{
107 |             color: #881391;
108 |             vertical-align: top;
109 |             padding-right: 10px;
110 |         }}
111 |         .value {{
112 |             padding-left: 10px;
113 |         }}
114 |         .string {{ color: #1a1aa6; }}
115 |         .number {{ color: #116644; }}
116 |         .boolean {{ color: #ff8c00; }}
117 |         .null {{ color: #808080; }}
118 |     </style>
119 |     """
120 |     return widgets.HTML(html_content)
121 | 
122 | def display_image_jsons(image, json_arr, titles):
123 |     image_widget = display_image(image)
124 |     right_column =  widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)])
125 |     bordered_hbox = widgets.HBox([image_widget, right_column])
126 |     bordered_hbox.layout.border = '5px solid black'
127 |     bordered_hbox.layout.padding = '10px'
128 |     bordered_hbox.layout.margin = '10px'
129 |     return bordered_hbox
130 | 
131 | def get_bucket_and_key(s3_uri):
132 |     parsed_uri = urlparse(s3_uri)
133 |     bucket_name = parsed_uri.netloc
134 |     object_key = parsed_uri.path.lstrip('/')
135 |     return (bucket_name, object_key)
136 | 
137 | def wait_for_job_to_complete(invocationArn):
138 |     get_status_response = wait_for_completion(
139 |             client=bda_runtime_client,
140 |             get_status_function=bda_runtime_client.get_data_automation_status,
141 |             status_kwargs={'invocationArn': invocationArn},
142 |             completion_states=['Success'],
143 |             error_states=['ClientError', 'ServiceError'],
144 |             status_path_in_response='status',
145 |             max_iterations=15,
146 |             delay=30
147 |     )
148 |     return get_status_response
149 | 
150 | 
151 | def read_s3_object(s3_uri):
152 |     # Parse the S3 URI
153 |     parsed_uri = urlparse(s3_uri)
154 |     bucket_name = parsed_uri.netloc
155 |     object_key = parsed_uri.path.lstrip('/')
156 |     # Create an S3 client
157 |     s3_client = boto3.client('s3')
158 |     try:
159 |         # Get the object from S3
160 |         response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
161 |         
162 |         # Read the content of the object
163 |         content = response['Body'].read().decode('utf-8')
164 |         return content
165 |     except Exception as e:
166 |         print(f"Error reading S3 object: {e}")
167 |         return None
168 | 
169 | def download_document(url, start_page_index=None, end_page_index=None, output_file_path=None):
170 | 
171 |     if not output_file_path:
172 |         filename = os.path.basename(url)
173 |         output_file_path = filename
174 |         
175 |     # Download the PDF
176 |     response = requests.get(url, timeout=30) # nosemgrep
177 |     print(response)
178 |     pdf_content = io.BytesIO(response.content)
179 |     
180 |     # Create a PDF reader object
181 |     pdf_reader = PdfReader(pdf_content)
182 |     
183 |     # Create a PDF writer object
184 |     pdf_writer = PdfWriter()
185 |     
186 |     start_page_index = 0 if not start_page_index else max(start_page_index,0)
187 |     end_page_index = len(pdf_reader.pages)-1 if not end_page_index else min(end_page_index,len(pdf_reader.pages)-1)
188 | 
189 |     # Specify the pages you want to extract (0-indexed)
190 |     pages_to_extract = list(range(start_page_index, end_page_index))
191 |     
192 |     # Add the specified pages to the writer
193 |     for page_num in pages_to_extract:
194 |         page = pdf_reader.pages[page_num]
195 |         pdf_writer.add_page(page)
196 | 
197 |     print(f"Created file: {output_file_path}")
198 |     # Save the extracted pages to a new PDF
199 |     with open(output_file_path, "wb") as output_file:
200 |         pdf_writer.write(output_file)
201 |     return output_file_path
202 | 
203 | def create_image_html_column(row: pd.Series, image_col: str, width: str = '300px') -> str:
204 |     """
205 |     Create HTML embedded image from S3 URI by downloading and base64 encoding the image for a DataFrame row.
206 |     
207 |     Args:
208 |         row (pd.Series): DataFrame row
209 |         image_col (str): Name of column containing S3 URI
210 |         width (str): Fixed width for image
211 |         
212 |     Returns:
213 |         str: HTML string for embedded image
214 |     """
215 |     s3_uri = row[image_col]
216 |     if isinstance(s3_uri, list):
217 |         s3_uri = s3_uri[0]    
218 |     if pd.isna(s3_uri):
219 |         return ''
220 |     
221 |     try:
222 |         # Parse S3 URI
223 |         bucket_name, object_key = get_bucket_and_key(s3_uri)
224 | 
225 |         
226 |         # Initialize S3 client
227 |         s3_client = boto3.client('s3')
228 |         
229 |         # Download image from S3
230 |         response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
231 |         image_content = response['Body'].read()
232 |         
233 |         # Open image using PIL
234 |         image = Image.open(io.BytesIO(image_content))
235 |         
236 |         # Convert image to RGB if it's in RGBA mode
237 |         if image.mode == 'RGBA':
238 |             image = image.convert('RGB')
239 |         
240 |         # Save image to bytes
241 |         buffered = io.BytesIO()
242 |         image.save(buffered, format="JPEG")
243 |         
244 |         # Encode image to base64
245 |         img_str = base64.b64encode(buffered.getvalue()).decode()
246 |         
247 |         # Create HTML string with base64 encoded image
248 |         return f'<img src="data:image/jpeg;base64,{img_str}" style="width: {width}; object-fit: contain;">'
249 |     except Exception as e:
250 |         print(f"Error processing image {s3_uri}: {str(e)}")
251 |         return ''
252 | 
253 | # Example usage:
254 | """
255 | # Add embedded images column
256 | df['embedded_images'] = add_embedded_images(df, 'crop_images', width='300px')
257 | 
258 | # For Jupyter notebook display:
259 | from IPython.display import HTML
260 | HTML(df['embedded_images'].iloc[0])
261 | """
262 | 
263 | 
264 | 
265 | def wait_for_completion(
266 |     client,
267 |     get_status_function,
268 |     status_kwargs,
269 |     status_path_in_response,
270 |     completion_states,
271 |     error_states,
272 |     max_iterations=60,
273 |     delay=10
274 | ):
275 |     for _ in range(max_iterations):
276 |         try:
277 |             response = get_status_function(**status_kwargs)
278 |             status = get_nested_value(response, status_path_in_response)
279 | 
280 |             if status in completion_states:
281 |                 print(f"Operation completed successfully with status: {status}")
282 |                 return response
283 | 
284 |             if status in error_states:
285 |                 raise Exception(f"Operation failed with status: {status}")
286 | 
287 |             print(f"Current status: {status}. Waiting...")
288 |             time.sleep(delay) # nosemgrep
289 | 
290 |         except ClientError as e:
291 |             raise Exception(f"Error checking status: {str(e)}")
292 | 
293 |     raise Exception(f"Operation timed out after {max_iterations} iterations")
294 | 
295 | 
296 | def get_nested_value(data, path):
297 |     """
298 |     Retrieve a value from a nested dictionary using a dot-separated path.
299 | 
300 |     :param data: The dictionary to search
301 |     :param path: A string representing the path to the value, e.g., "Job.Status"
302 |     :return: The value at the specified path, or None if not found
303 |     """
304 |     keys = path.split('.')
305 |     for key in keys:
306 |         if isinstance(data, dict) and key in data:
307 |             data = data[key]
308 |         else:
309 |             return None
310 |     return data
311 | 
312 | 
313 | def display_html(data, root='root', expanded=True, bg_color='#f0f0f0'):
314 |     html = f"""
315 |         <div class="custom-json-output" style="background-color: {bg_color}; padding: 10px; border-radius: 5px;">
316 |             <button class="toggle-btn" style="margin-bottom: 10px;">{'Collapse' if expanded else 'Expand'}</button>
317 |             <pre class="json-content" style="display: {'block' if expanded else 'none'};">{data}</pre>
318 |         </div>
319 |         <script>
320 |         (function() {{
321 |             var toggleBtn = document.currentScript.previousElementSibling.querySelector('.toggle-btn');
322 |             var jsonContent = document.currentScript.previousElementSibling.querySelector('.json-content');
323 |             toggleBtn.addEventListener('click', function() {{
324 |                 if (jsonContent.style.display === 'none') {{
325 |                     jsonContent.style.display = 'block';
326 |                     toggleBtn.textContent = 'Collapse';
327 |                 }} else {{
328 |                     jsonContent.style.display = 'none';
329 |                     toggleBtn.textContent = 'Expand';
330 |                 }}
331 |             }});
332 |         }})();
333 |         </script>
334 |         """
335 |     display(HTML(html))
336 | 
337 | def send_request(region, url, method, credentials, payload=None, service='bedrock'):
338 |     host = url.split("/")[2]
339 |     request = AWSRequest(
340 |             method,
341 |             url,
342 |             data=payload,
343 |             headers={'Host': host, 'Content-Type':'application/json'}
344 |     )    
345 |     SigV4Auth(credentials, service, region).add_auth(request)
346 |     response = requests.request(method, url, headers=dict(request.headers), data=payload, timeout=50)
347 |     response.raise_for_status()
348 |     content = response.content.decode("utf-8")
349 |     data = json.loads(content)
350 |     return data
351 | 
352 | def invoke_blueprint_recommendation_async(bda_client, payload):
353 |     credentials = boto3.Session().get_credentials().get_frozen_credentials()
354 |     region_name = boto3.Session().region_name
355 |     url = f"{bda_client.meta.endpoint_url}/invokeBlueprintRecommendationAsync"
356 |     print(f'Sending request to {url}')
357 |     result = send_request(
358 |         region = region_name,
359 |         url = url,
360 |         method = "POST", 
361 |         credentials = credentials,
362 |         payload=payload
363 |     )
364 |     return result
365 | 
366 | 
367 | def get_blueprint_recommendation(bda_client, job_id):
368 |     credentials = boto3.Session().get_credentials().get_frozen_credentials()
369 |     region_name = boto3.Session().region_name
370 |     url = f"{bda_client.meta.endpoint_url}/getBlueprintRecommendation/{job_id}/"
371 |     result = send_request(
372 |         region = region_name,
373 |         url = url,
374 |         method = "POST",
375 |         credentials = credentials        
376 |     )
377 |     return result
378 | 
379 | def get_s3_to_dict(s3_url):
380 |     bucket_name = s3_url.split('/')[2]
381 |     object_key = '/'.join(s3_url.split('/')[3:])
382 |     
383 |     # Download the JSON file from S3
384 |     response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
385 |     json_content = response['Body'].read().decode('utf-8')
386 |     
387 |     # Parse the JSON content
388 |     json_obj = json.loads(json_content)
389 |     return json_obj
390 | 
391 | def create_or_update_blueprint(bda_client, blueprint_name, blueprint_description, blueprint_type, blueprint_stage, blueprint_schema):
392 |     list_blueprints_response = bda_client.list_blueprints(
393 |         blueprintStageFilter='ALL'
394 |     )
395 |     blueprint = next((blueprint for blueprint in
396 |                       list_blueprints_response['blueprints']
397 |                       if 'blueprintName' in blueprint and
398 |                       blueprint['blueprintName'] == blueprint_name), None)
399 | 
400 |     if not blueprint:
401 |         print(f'No existing blueprint found with name={blueprint_name}, creating custom blueprint')
402 |         response = bda_client.create_blueprint(
403 |             blueprintName=blueprint_name,
404 |             type=blueprint_type,
405 |             blueprintStage=blueprint_stage,
406 |             schema=json.dumps(blueprint_schema)
407 |         )
408 |     else:
409 |         print(f'Found existing blueprint with name={blueprint_name}, updating Stage and Schema')
410 |         response = bda_client.update_blueprint(
411 |             blueprintArn=blueprint['blueprintArn'],
412 |             blueprintStage=blueprint_stage,
413 |             schema=json.dumps(blueprint_schema)
414 |         )
415 | 
416 |     return response['blueprint']['blueprintArn']
417 | 
418 | 
419 | def transform_custom_output(input_json, explainability_info):
420 |     result = {
421 |         "forms": {},
422 |         "tables": {}
423 |     }
424 | 
425 |     def add_confidence(value, conf_info):
426 |         return {"value": value, "confidence": conf_info["confidence"]} if isinstance(conf_info, dict) and "confidence" in conf_info else value
427 |     
428 |     def process_list_item(item, conf_info):
429 |         return {k: add_confidence(v, conf_info.get(k, {})) for k, v in item.items() if isinstance(conf_info, dict)}    
430 | 
431 |     # Iterate through the input JSON
432 |     for key, value in input_json.items():
433 |         confidence_data = explainability_info.get(key, {})
434 |         if isinstance(value, list):
435 |             # Handle lists (tables)
436 |             processed_list = []
437 |             for idx, item in enumerate(value):
438 |                 if isinstance(item, dict):
439 |                     # Process each item in the list using its corresponding confidence info
440 |                     conf_info = confidence_data[idx] if isinstance(confidence_data, list) else confidence_data
441 |                     processed_list.append(process_list_item(item, conf_info))
442 |             result["tables"][key] = processed_list
443 |         else:
444 |             # Handle simple key-value pairs (forms)
445 |             result["forms"][key] = add_confidence(value, confidence_data)
446 |             
447 |     return result
448 | 
449 | 
450 | def get_summaries(custom_outputs):
451 |     return [{
452 |         'page_indices': output.get('split_document', {}).get('page_indices'),
453 |         'matched_blueprint_name': output.get('matched_blueprint', {}).get('name'),
454 |         'confidence': output.get('matched_blueprint', {}).get('confidence'),
455 |         'document_class_type': output.get('document_class', {}).get('type')
456 |     } if output else {} for output in custom_outputs]
457 | 
458 | def restart_kernel():
459 |     # Function to display message after restart
460 |     def show_restart_message():
461 |         # Wait for kernel restart
462 |         time.sleep(2)  # nosemgrep
463 |         print("Restarting Kernel...Wait a few seconds and progress executing subsequent cells.")
464 | 
465 |     # Display message and restart kernel
466 |     show_restart_message()
467 |     display(HTML("<script>IPython.notebook.kernel.restart()</script>"))
468 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/claims-review-agent-action/index.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import boto3
  3 | import os
  4 | 
  5 | s3 = boto3.client("s3")
  6 | 
  7 | session = boto3.Session()  
  8 | rds_data = session.client(
  9 |     service_name='rds-data'
 10 | )
 11 | 
 12 | CLAIMS_DB_CLUSTER_ARN = os.environ['CLAIMS_DB_CLUSTER_ARN']
 13 | CLAIMS_DB_DATABASE_NAME = os.environ['CLAIMS_DB_DATABASE_NAME']
 14 | CLAIMS_DB_CREDENTIALS_SECRET_ARN = os.environ['CLAIMS_DB_CREDENTIALS_SECRET_ARN']
 15 | 
 16 | 
 17 | MEMBER_DETAILS_QUERY = """
 18 |     SELECT insured_id,insured_name,insured_group_number,insured_plan_name,insured_birth_date,insured_policy_number,phone_number
 19 | ,address FROM Insured_Person WHERE insured_policy_number=:insured_policy_number;
 20 | """
 21 | 
 22 | PATIENT_DETAILS_QUERY = """
 23 |     SELECT p.patient_id,i.insured_id,p.patient_firstname,p.patient_lastname,p.patient_birth_date,p.relationship_to_insured,p.phone_number,p.sex,p.address 
 24 |     FROM Patient p, Insured_Person i WHERE i.insured_id = p.insured_id AND i.insured_policy_number = :insured_policy_number 
 25 |     AND patient_lastname=:patient_lastname AND patient_birth_date=TO_DATE(:patient_birth_date,'YYYY-MM-DD');
 26 | """
 27 | 
 28 | MEMBER_AND_PATIENT_DETAILS_QUERY = """
 29 |     SELECT 
 30 |     i.insured_id,i.insured_name,i.insured_group_number,i.insured_plan_name,i.insured_birth_date,i.insured_policy_number,i.address insured_address,i.phone_number insured_phone_number,
 31 |     p.patient_id,p.patient_firstname,p.patient_lastname,p.patient_birth_date,p.relationship_to_insured,p.phone_number patient_phone_number,p.sex patient_sex,p.address patient_address
 32 |     FROM Patient p, Insured_Person i WHERE i.insured_id = p.insured_id AND i.insured_policy_number = :insured_policy_number 
 33 |     AND patient_lastname=:patient_lastname AND patient_birth_date=TO_DATE(:patient_birth_date,'YYYY-MM-DD');
 34 | """
 35 | 
 36 | CREATE_CLAIM_QUERY = """
 37 |     INSERT INTO Claim (patient_id,claim_date,diagnosis_1,diagnosis_2,diagnosis_3,diagnosis_4,total_charges,balanceDue, amountPaid,claim_status) VALUES 
 38 |     (:patient_id, TO_DATE(:claim_date, 'YYYY-MM-DD'), :diagnosis_1, :diagnosis_2, :diagnosis_3, :diagnosis_4, :total_charges,:balanceDue, :amountPaid, :claim_status)
 39 |     RETURNING claim_id
 40 | """
 41 | 
 42 | UPDATE_CLAIM_QUERY = """
 43 |     UPDATE CLAIM 
 44 |     SET claim_status = :claim_status 
 45 |     WHERE claim_id = :claim_id 
 46 |     RETURNING claim_id, claim_status
 47 | """
 48 | 
 49 | CREATE_SERVICE_QUERY = """
 50 |     INSERT INTO SERVICE (claim_id, date_of_service, place_of_service,type_of_service,procedure_code) VALUES 
 51 |     (:claim_id, TO_DATE(:date_of_service, 'YYYY-MM-DD'), :place_of_service, :type_of_service, :procedure_code)
 52 |     RETURNING claim_id, service_id
 53 | """
 54 | 
 55 | 
 56 | class ParameterError(Exception):
 57 |     """Base exception for parameter-related errors"""
 58 |     pass
 59 | 
 60 | class MissingParametersError(ParameterError):
 61 |     """Raised when the parameters dict is empty or missing"""
 62 |     pass
 63 | 
 64 | class ParameterNotFoundError(ParameterError):
 65 |     """Raised when a specific parameter is not found"""
 66 |     pass
 67 | 
 68 | 
 69 | def run_command(sql_statement, parameters=None):
 70 |     print(f"SQL statement: {sql_statement}")
 71 |     result = rds_data.execute_statement(
 72 |         resourceArn=CLAIMS_DB_CLUSTER_ARN,
 73 |         secretArn=CLAIMS_DB_CREDENTIALS_SECRET_ARN,
 74 |         database=CLAIMS_DB_DATABASE_NAME,
 75 |         sql=sql_statement,
 76 |         includeResultMetadata=True,
 77 |         parameters=parameters
 78 |     )
 79 |     return result
 80 | 
 81 | def getClaimsFormData(event) :
 82 |     s3_uri = get_parameter(event, "s3URI")
 83 |     response = s3.get_object(Bucket=s3_uri.split('/',3)[2], Key=s3_uri.split('/',3)[3])
 84 |     content = response['Body'].read().decode('utf-8')
 85 |     json_content = json.loads(content)
 86 | 
 87 |     #create response json as a list of dictionaries
 88 |     response =  {
 89 |             "claims_form_data": json_content
 90 |     }
 91 |     return response
 92 | 
 93 | 
 94 | def getAllOpenClaims(event) :
 95 |     
 96 |     #create response json as a list of dictionaries
 97 |     response = [
 98 |         {
 99 |             "claimId": "11111111",
100 |             "policyHolderId": "John Doe",
101 |             "claimStatus": "2021-01-01",
102 |         }
103 |     ]
104 |     return response
105 | 
106 | def get_parameter(event, parameter_name):
107 |     params = event["parameters"]
108 |     if not params:
109 |         raise MissingParametersError("No parameters provided")
110 |     else:
111 |         param = [p for p in params if p["name"] == parameter_name]
112 |         if not param:
113 |             raise ParameterNotFoundError(f"Missing parameter: {parameter_name}")
114 |         else:
115 |             return param[0]["value"]
116 | 
117 | def get_request_property(event, property_name, defaultValue=None):
118 |     request_body = event["requestBody"]
119 |     content = request_body["content"]
120 |     application_json = content["application/json"]
121 |     properties = application_json["properties"]
122 |     property = [p for p in properties if p["name"]==property_name]
123 |     if not property:
124 |         if not defaultValue:
125 |             raise ParameterNotFoundError(f"Missing parameter: {property_name}")
126 |         else:
127 |             return defaultValue
128 |     else:
129 |         value = None
130 |         match property[0]["type"]:
131 |             case 'string':
132 |                 value = str(property[0]["value"])
133 |             case 'number':
134 |                 value = float(property[0]["value"])
135 |             case 'integer':
136 |                 value = int(property[0]["value"])
137 |             case _:
138 |                 value = property[0]["value"]
139 |     return value
140 | 
141 | def results_by_column_name(result):
142 |     columns = [column["name"] for column in result["columnMetadata"]]
143 |     records = result["records"]
144 |     results = []
145 |     for record in records:
146 |         print(record)
147 |         values = [list(value.values())[0] for value in record]
148 |         print(values)
149 |         results.append(dict(zip(columns, values)))
150 |         print(results)
151 |     return results
152 | 
153 | # Function to create parameter dict
154 | def create_param(name, value):
155 |     print(f"name:{name}, value:{value}")
156 |     if value is None:
157 |         return {'name': name, 'value': {'isNull': True}}
158 |     elif isinstance(value, str):
159 |         return {'name': name, 'value': {'stringValue': value}}
160 |     elif isinstance(value, int):
161 |         return {'name': name, 'value': {'longValue': value}}
162 |     elif isinstance(value, float):
163 |         return {'name': name, 'value': {'doubleValue': value}}
164 |     elif isinstance(value, bool):
165 |         return {'name': name, 'value': {'booleanValue': value}}
166 |     else:
167 |         raise ValueError(f"Unsupported type for {name}: {type(value)}")
168 | 
169 | def getMemberAndPatientDetails(event) :
170 | 
171 |     insured_policy_number = get_parameter(event, "insured_id_number")
172 |     patient_lastname = get_parameter(event, "patient_last_name")
173 |     patient_birth_date = get_parameter(event, "patient_birth_date")
174 |     parameters=[
175 |         {
176 |             'name':'insured_policy_number', 
177 |             'value':{'stringValue':insured_policy_number}
178 |         },
179 |         {
180 |             'name':'patient_lastname', 
181 |             'value':{'stringValue':patient_lastname}
182 |         },
183 |         {
184 |             'name':'patient_birth_date', 
185 |             'value':{'stringValue':patient_birth_date}
186 |         }
187 |     ] 
188 | 
189 |     result = run_command(MEMBER_AND_PATIENT_DETAILS_QUERY, parameters)
190 |     print(result)
191 |     data = results_by_column_name(result)
192 |     if not data:
193 |         return f"""
194 |             Unable to get Member and/or Patient details with 
195 |             Insured Id Number={insured_policy_number},
196 |             Patient Last Name={patient_lastname},
197 |             Patient Birth Date={patient_birth_date}
198 |         """
199 |     member = data[0]
200 |     response = {
201 |         "insuredId": member['insured_id'],
202 |         "memberName": member['insured_name'],
203 |         "memberAddress": member['insured_address'],
204 |         "memberDateOfBirth": member['insured_birth_date'],
205 |         "memberPlanDetails": {
206 |             "memberGroupNumber": member['insured_group_number'],
207 |             "memberPlanName": member['insured_plan_name'],
208 |             "memberPlanNumber": member['insured_policy_number'],
209 |         },
210 |         "memberPhoneNumber": member['insured_phone_number'],
211 |         "patientId": member['patient_id'],
212 |         "patientFirstName": member['patient_firstname'],
213 |         "patientLastName":  member['patient_lastname'],
214 |         "patientDateOfBirth": member['patient_birth_date'],
215 |         "patientRelationshipToInsured": member['relationship_to_insured'],
216 |         "patientPhoneNumber": member['patient_phone_number'],
217 |         "patientSex": member['patient_sex'],
218 |         "patientAddress": member['patient_address'],
219 |     }
220 | 
221 |     return response
222 | 
223 | def getMemberDetails(event) :
224 | 
225 |     insured_policy_number = get_parameter(event, "insured_id_number")
226 |     parameters=[
227 |         {
228 |             'name':'insured_policy_number', 
229 |             'value':{'stringValue':insured_policy_number}
230 |         }
231 |     ] 
232 | 
233 |     result = run_command(MEMBER_DETAILS_QUERY, parameters)
234 |     print(result)
235 |     data = results_by_column_name(result)
236 |     if not data:
237 |         return f"Insured Member with last name {insured_policy_number}  not found"
238 |     member = data[0]
239 |     response = {"memberName": member['insured_name'],
240 |                 "memberAddress": member['address'],
241 |                 "memberDateOfBirth": member['insured_birth_date'],
242 |                 "memberPlanDetails": {
243 |                     "memberGroupNumber": member['insured_group_number'],
244 |                     "memberPlanName": member['insured_plan_name'],
245 |                     "memberPlanNumber": member['insured_policy_number'],
246 |                 },
247 |                 "memberPhoneNumber": member['phone_number']
248 |             }
249 | 
250 |     return response
251 | 
252 | def listClaimsForInsured(event) :
253 |     response = [
254 |         {
255 |             "claimId": "XXXXXXXX",
256 |             "policyHolderId": "John Doe",
257 |             "claimStatus": "2021-01-01",
258 |         }
259 |     ]
260 |     return response
261 | 
262 | def getClaim(event):
263 |     response = {"claimId": "XXXXXXXX",
264 |                 "claim_description": "Not Implemented"
265 |     }
266 | 
267 |     return response
268 | 
269 | def create_claim(event) :
270 |     parameters = [
271 |         create_param("patient_id", get_request_property (event, "patient_id")),
272 |         create_param("claim_date", get_request_property(event,"claim_date")),
273 |         create_param("diagnosis_1", get_request_property(event,"diagnosis_1")),
274 |         create_param("diagnosis_2", get_request_property(event,"diagnosis_2",'')),
275 |         create_param("diagnosis_3", get_request_property(event,"diagnosis_3",'')),
276 |         create_param("diagnosis_4", get_request_property(event,"diagnosis_4",'')),
277 |         create_param("total_charges", get_request_property(event,"total_charges")),
278 |         create_param("amountPaid", get_request_property(event,"amount_paid")),
279 |         create_param("balanceDue", get_request_property(event,"balance")),
280 |         create_param("claim_status", get_request_property(event,"claim_status","NEW"))
281 |     ]
282 |     print(parameters)
283 |     result = run_command(sql_statement=CREATE_CLAIM_QUERY, parameters=parameters)
284 |     
285 |     print(result)
286 |     data = results_by_column_name(result)
287 |     if not data:
288 |         raise ParameterNotFoundError("Missing return record after Insert")
289 |     response = {
290 |         "claim_id": data[0]["claim_id"]
291 |     }
292 |     return response
293 | 
294 | 
295 | def update_claim(event) :
296 |     parameters = [
297 |         create_param("claim_id", int(get_parameter (event, "claim_id"))),
298 |         create_param("claim_status", get_request_property(event,"status","ADJUDICATOR_REVIEW"))
299 |     ]
300 |     print(parameters)
301 |     result = run_command(sql_statement=UPDATE_CLAIM_QUERY, parameters=parameters)
302 |     
303 |     print(result)
304 |     data = results_by_column_name(result)
305 |     if not data:
306 |         raise ParameterNotFoundError("Missing return record after Insert")
307 |     response = {
308 |         "claim_id": data[0]["claim_id"],
309 |         "claim_status": data[0]["claim_status"]
310 |     }
311 |     return response
312 | 
313 | def create_claim_service(event):
314 |     try:
315 |         claim_id = int(get_parameter(event, "claim_id"))
316 |     except (ValueError, TypeError):
317 |         return {'error': 'Invalid claim_id. Please provide a valid integer value'}
318 | 
319 |     parameters = [
320 |         create_param("claim_id", claim_id),
321 |         create_param("date_of_service", get_request_property(event,"date_of_service")),
322 |         create_param("place_of_service", get_request_property(event,"place_of_service")),
323 |         create_param("type_of_service", get_request_property(event,"type_of_service")),
324 |         create_param("procedure_code", get_request_property(event,"procedure_code")),
325 |         create_param("amount", get_request_property(event,"amount"))
326 |     ]
327 |     result = run_command(sql_statement=CREATE_SERVICE_QUERY, parameters=parameters)
328 |     print(result)
329 |     data = results_by_column_name(result)
330 |     if not data:
331 |         raise ParameterNotFoundError("Missing return record after Insert")
332 |     response = {
333 |         "claim_id": data[0]["claim_id"],
334 |         "service_id": data[0]["service_id"]
335 |     }
336 |     return response
337 | 
338 | 
339 | def getPatient(event):
340 | 
341 |     patient_lastname = get_parameter(event, "patient_lastName")
342 |     patient_birth_date = get_parameter(event, "patient_birth_date")
343 |     insured_policy_number = get_parameter(event, "insured_id_number")
344 |     parameters=[
345 |         {
346 |             'name':'patient_lastname', 
347 |             'value':{'stringValue':patient_lastname}
348 |         },
349 |         {
350 |             'name':'insured_policy_number', 
351 |             'value':{'stringValue':insured_policy_number}
352 |         },
353 |         {
354 |             'name':'patient_birth_date', 
355 |             'value':{'stringValue':patient_birth_date}
356 |         }
357 |     ] 
358 | 
359 |     result = run_command(PATIENT_DETAILS_QUERY, parameters)
360 |     print(result)
361 |     data = results_by_column_name(result)
362 |     if not data:
363 |         return f"Patient with last name {patient_lastname} and birth data {patient_birth_date} not found associated with insured id number {insured_policy_number}"
364 |     patient = data[0]
365 |     response = {
366 |         "firstName": patient['patient_firstname'],
367 |         "lastName": patient['patient_lastname'],
368 |         "dateOfBirth": patient['patient_birth_date'],
369 |         "gender": patient['sex'],
370 |         "address": patient['address'],
371 |         "relationshipToInsured": patient['relationship_to_insured'],
372 |         "phoneNumber": patient['phone_number']
373 |     }
374 | 
375 |     return response
376 | 
377 | def createPatient(event) :
378 |     CREATE_CLAIM_QUERY.format(claim_values=get_parameter(event, "claim_values"))
379 |     response = {"claimId": "XXXXXXXX"}
380 |     return response
381 | 
382 | 
383 | def lambda_handler(event, context):
384 |     print(event)
385 |     action = event["actionGroup"]
386 |     api_path = event["apiPath"]
387 |     httpMethod = event["httpMethod"]
388 |     response_code = 200
389 |     response = None
390 |     try:
391 |         match api_path:
392 |             case '/member_and_patient':
393 |                 response = getMemberAndPatientDetails(event)
394 |             case '/member/{insured_id_number}':
395 |                 response = getMemberDetails(event)
396 |             case '/claims' :
397 |                 if(httpMethod == "GET"):
398 |                     response = getAllOpenClaims(event)
399 |                 elif(httpMethod == "POST"):
400 |                     response = create_claim(event)
401 |             case '/patient' :
402 |                 if(httpMethod == "GET"):
403 |                     response = getPatient(event)
404 |                 elif(httpMethod == "POST"):
405 |                     response = createPatient(event)
406 |             case '/get_claims_form_data':
407 |                 response = getClaimsFormData(event)
408 |             case '/claims/{claim_id}/service':
409 |                 response = create_claim_service(event)
410 |             case '/claims/{claim_id}':
411 |                 if(httpMethod == "GET"):
412 |                     response = getClaim(event)
413 |                 elif(httpMethod == "PATCH"):
414 |                     response = update_claim(event)
415 |             case '/claims/insured/{insuredId}':
416 |                 response = listClaimsForInsured(event)
417 |             case 'claims/{claim_id}/service':
418 |                 response = create_claim_service(event)
419 |             case _:
420 |                 response_code = 404
421 |                 response = {"error": f"{action}::{api_path} is not a valid API, try another one."}
422 |     except ParameterError as pe:
423 |         response_code = 400
424 |         response = {"error": str(pe)}
425 |     except Exception as e:
426 |         response_code = 500
427 |         response = {"error": str(e)}
428 | 
429 | 
430 |     response_body = {"application/json": {"body": json.dumps(response)}}
431 | 
432 | 
433 |     action_response = {
434 |         "actionGroup": event["actionGroup"],
435 |         "apiPath": event["apiPath"],
436 |         "httpMethod": event["httpMethod"],
437 |         "httpStatusCode": response_code,
438 |         "responseBody": response_body,
439 |     }
440 | 
441 |     session_attributes = event["sessionAttributes"]
442 |     prompt_session_attributes = event["promptSessionAttributes"]
443 | 
444 |     api_response = {
445 |         "messageVersion": "1.0",
446 |         "response": action_response,
447 |         "sessionAttributes": session_attributes,
448 |         "promptSessionAttributes": prompt_session_attributes,
449 |     }
450 |     print(api_response)
451 |     return api_response         


--------------------------------------------------------------------------------
/10-Understanding-BDA/11_getting_started_with_bda.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "30c014be-715c-4d02-b8a0-bbded2352750",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# How Bedrock Data Automation works\n",
  9 |     "\n",
 10 |     "Bedrock Data Automation (BDA) lets you configure output based on your processing needs for a specific data type: documents, images, video or audio. BDA can generate standard output or custom output. Below are some key concepts for understanding how BDA works. If you're a new user, start with the information about standard output.\n",
 11 |     "\n",
 12 |     "* **Standard output** – Sending a file to BDA with no other information returns the default standard output, which consists of commonly required information that's based on the data type. Examples include audio transcriptions, scene summaries for video, and document summaries. These outputs can be tuned to your use case using projects to modify them. For more information, see e.g. [Standard output for documents in Bedrock Data Automation](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-output-documents.html).\n",
 13 |     "\n",
 14 |     "* **Custom output** – For documents and images, only. Choose custom output to define exactly what information you want to extract using a blueprint. A blueprint consists of a list of expected fields that you want retrieved from a document or image. Each field represents a piece of information that needs to be extracted to meet your specific use case. You can create your own blueprints, or select predefined blueprints from the BDA blueprint catalog. For more information, see [Custom output and blueprints](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-custom-output-idp.html).\n",
 15 |     "\n",
 16 |     "* **Projects** – A project is a BDA resource that allows you to modify and organize output configurations. Each project can contain standard output configurations for documents, images, video, and audio, as well as custom output blueprints for documents and images. Projects are referenced in the `InvokeDataAutomationAsync` API call to instruct BDA on how to process the files. For more information about projects and their use cases, see [Bedrock Data Automation projects](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-projects.html).\n",
 17 |     "\n",
 18 |     "In this notebook, we see will see how we can get started with using BDA API for your document processing use cases. The Amazon Bedrock Data Automation (BDA) feature provides a streamlined API workflow for processing your data. For all modalities, this workflow consists of three main steps: creating a project, invoking the analysis, and retrieving the results. To retrieve custom output for your processed data, you provide the Blueprint ARN when you invoke the analysis operation."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "444c3287-fb3e-4da9-9d37-728456ac52fe",
 24 |    "metadata": {
 25 |     "editable": true,
 26 |     "slideshow": {
 27 |      "slide_type": ""
 28 |     },
 29 |     "tags": []
 30 |    },
 31 |    "source": [
 32 |     "## Prerequisites"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "1264708d-a57e-4e71-89f5-3090dbf73972",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Configure IAM Permissions\n",
 41 |     "\n",
 42 |     "The features being explored in the workshop require multiple IAM Policies for the role being used. If you're running this notebook within SageMaker Studio in your own Account, update the default execution role for the SageMaker user profile to include the IAM policies described in [README.md](../README.md)."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "id": "6c12476d-7970-46cf-9488-7bc1dc1ca6ad",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "### Install Required Libraries"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "id": "1fd8ca60-b430-4047-88fa-1a5189ec57aa",
 57 |    "metadata": {
 58 |     "editable": true,
 59 |     "slideshow": {
 60 |      "slide_type": ""
 61 |     },
 62 |     "tags": []
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "%pip install --no-warn-conflicts \"boto3>=1.37.6\" itables==2.2.4 PyPDF2==3.0.1 --upgrade -q"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "id": "58746522-d462-486a-a2b9-2b57dec72f84",
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from utils.helper_functions import restart_kernel\n",
 77 |     "restart_kernel()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "id": "aaf88a7d-9164-49f8-a83e-402600e4297a",
 84 |    "metadata": {
 85 |     "editable": true,
 86 |     "slideshow": {
 87 |      "slide_type": ""
 88 |     },
 89 |     "tags": []
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "%load_ext autoreload\n",
 94 |     "%autoreload 2"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "77914217-a4b8-4220-9dd4-4382a1695f87",
100 |    "metadata": {
101 |     "editable": true,
102 |     "slideshow": {
103 |      "slide_type": ""
104 |     },
105 |     "tags": []
106 |    },
107 |    "source": [
108 |     "### Setup\n",
109 |     "\n",
110 |     "Before we get to the part where we invoke BDA with our sample artifacts, let's setup some parameters and configuration that will be used throughout this notebook"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "id": "9c9bf70d-ae3b-4988-bbd7-4e544675dcbd",
117 |    "metadata": {
118 |     "editable": true,
119 |     "slideshow": {
120 |      "slide_type": ""
121 |     },
122 |     "tags": []
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "import boto3\n",
127 |     "import json\n",
128 |     "from IPython.display import JSON, IFrame\n",
129 |     "import sagemaker\n",
130 |     "from utils.helper_functions import read_s3_object, wait_for_job_to_complete, get_bucket_and_key\n",
131 |     "from pathlib import Path\n",
132 |     "import os\n",
133 |     "\n",
134 |     "session = sagemaker.Session()\n",
135 |     "default_bucket = session.default_bucket()\n",
136 |     "current_region = boto3.session.Session().region_name\n",
137 |     "\n",
138 |     "sts_client = boto3.client('sts')\n",
139 |     "account_id = sts_client.get_caller_identity()['Account']\n",
140 |     "\n",
141 |     "# Initialize Bedrock Data Automation client\n",
142 |     "bda_client = boto3.client('bedrock-data-automation')\n",
143 |     "bda_runtime_client = boto3.client('bedrock-data-automation-runtime')\n",
144 |     "s3_client = boto3.client('s3')\n",
145 |     "\n",
146 |     "bda_s3_input_location = f's3://{default_bucket}/bda/input'\n",
147 |     "bda_s3_output_location = f's3://{default_bucket}/bda/output'\n",
148 |     "\n",
149 |     "print(f\"My BDA output s3 URI: {bda_s3_output_location}\")"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "id": "fc838291-b380-442c-9342-e0f1c399527b",
155 |    "metadata": {
156 |     "editable": true,
157 |     "slideshow": {
158 |      "slide_type": ""
159 |     },
160 |     "tags": []
161 |    },
162 |    "source": [
163 |     "## Prepare sample document\n",
164 |     "For this lab, we use a sample `Bank Statement` for Fiscal Year 2025 through November 30, 2024. The document is prepared by the Bureau of the Fiscal Service, Department of the Treasury and provides detailed information on the government's financial activities. We will extract a subset of pages from the `PDF` document and use BDA to extract and analyse the document content.\n",
165 |     "\n",
166 |     "### Download and store sample document\n",
167 |     "we use the document url to download the document and store it a S3 location. \n",
168 |     "\n",
169 |     "Note - We will configure BDA to use the sample input from this S3 location, so we need to ensure that BDA has `s3:GetObject` access to this S3 location. If you are running the notebook in your own AWS Account, ensure that the SageMaker Execution role configured for this JupyterLab app has the right IAM permissions."
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "id": "a7e877eb-dcf7-4ce0-bc55-2b75af452a3b",
176 |    "metadata": {
177 |     "editable": true,
178 |     "slideshow": {
179 |      "slide_type": ""
180 |     },
181 |     "tags": []
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "local_download_path = \"data/documents/\"\n",
186 |     "local_file_name = \"BankStatement.jpg\"\n",
187 |     "file_path_local = f\"{local_download_path}/{local_file_name}\"\n",
188 |     "os.makedirs(local_download_path, exist_ok=True)\n",
189 |     "\n",
190 |     "# Download Sample file\n",
191 |     "#(bucket, key) = get_bucket_and_key(document_url)\n",
192 |     "#response = s3_client.download_file(bucket, key, file_path_local)\n",
193 |     "\n",
194 |     "# Upload the document to S3\n",
195 |     "document_s3_uri = f'{bda_s3_input_location}/{local_file_name}'\n",
196 |     "\n",
197 |     "target_s3_bucket, target_s3_key = get_bucket_and_key(document_s3_uri)\n",
198 |     "s3_client.upload_file(file_path_local, target_s3_bucket, target_s3_key)\n",
199 |     "\n",
200 |     "print(f\"Downloaded file to: {file_path_local}\")\n",
201 |     "print(f\"Uploaded file to S3: {target_s3_key}\")\n",
202 |     "print(f\"document_s3_uri: {document_s3_uri}\")"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "id": "24270477-69c5-4f4c-b171-3dde759f2068",
208 |    "metadata": {
209 |     "editable": true,
210 |     "slideshow": {
211 |      "slide_type": ""
212 |     },
213 |     "tags": []
214 |    },
215 |    "source": [
216 |     "### View Sample Document"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "id": "08521fcf-c8bc-413f-bc9d-b3a93e6be0da",
223 |    "metadata": {
224 |     "editable": true,
225 |     "slideshow": {
226 |      "slide_type": ""
227 |     },
228 |     "tags": []
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "IFrame(file_path_local, width=600, height=400)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "id": "ff2ef08f-2d3f-441b-8325-edbf98b90089",
238 |    "metadata": {},
239 |    "source": [
240 |     "## Using BDA for standard output\n",
241 |     "\n",
242 |     "Sending e.g. a document to BDA with no other information using the [`InvokeDataAutomationAsync` API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-data-automation-runtime/client/invoke_data_automation_async.html) looks as follows:\n",
243 |     "\n",
244 |     "BDA will process the file provided in `inputConfiguration` and write the output to the s3 URI of `outputConfiguration`.\n",
245 |     "\n",
246 |     "```python\n",
247 |     "response = bda_runtime_client.invoke_data_automation_async(\n",
248 |     "    inputConfiguration={\n",
249 |     "        's3Uri': 's3://bedrock-data-automation-prod-assets-us-west-2/demo-assets/Document/BankStatement.jpg'\n",
250 |     "    },\n",
251 |     "    outputConfiguration={\n",
252 |     "        's3Uri': 's3://my_output'\n",
253 |     "    },\n",
254 |     ")\n",
255 |     "```"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "id": "9f3232db-8ed2-4ef6-921c-c2a8165f30db",
261 |    "metadata": {},
262 |    "source": [
263 |     "### Invoking BDA for standard output"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "id": "e9199ffd-2889-4632-a683-b1d3d914ae91",
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "response = bda_runtime_client.invoke_data_automation_async(\n",
274 |     "    inputConfiguration={        \n",
275 |     "        's3Uri': document_s3_uri\n",
276 |     "    },\n",
277 |     "    outputConfiguration={'s3Uri': f'{bda_s3_output_location}'},\n",
278 |     "    dataAutomationProfileArn = f'arn:aws:bedrock:{current_region}:{account_id}:data-automation-profile/us.data-automation-v1',\n",
279 |     "    dataAutomationConfiguration = {\n",
280 |     "        'dataAutomationProjectArn': f'arn:aws:bedrock:{current_region}:aws:data-automation-project/public-default',\n",
281 |     "    }\n",
282 |     ")\n",
283 |     "JSON(response)"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "id": "bbfe5bc6-2e59-46fc-8f90-a22281622eb1",
289 |    "metadata": {},
290 |    "source": [
291 |     "### Get data automation job status"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "id": "d0cd06e2-6b32-4648-9495-1735f560f6ba",
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "status_response = wait_for_job_to_complete(invocationArn=response[\"invocationArn\"])\n",
302 |     "JSON(status_response)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "id": "d9b52698-02dc-4440-95d2-2b1b5c18f37e",
308 |    "metadata": {},
309 |    "source": [
310 |     "### Retrieve job metadata"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": null,
316 |    "id": "08e051a1-118e-4f5a-ac94-5a983b8add0e",
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "job_metadata_s3 = status_response[\"outputConfiguration\"][\"s3Uri\"]\n",
321 |     "print(f\"Retrieving job metadata: {job_metadata_s3}\")\n",
322 |     "job_metadata = json.loads(read_s3_object(job_metadata_s3))\n",
323 |     "\n",
324 |     "JSON(job_metadata,root='job_metadata',expanded=True)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "id": "b67cd4f5-4574-4545-b6c9-7cff198e2c82",
330 |    "metadata": {},
331 |    "source": [
332 |     "### Get job results for standard output\n",
333 |     "\n",
334 |     "The standard output will contain the following fields\n",
335 |     "\n",
336 |     "* metadata: simple document metadata like location and number of pages\n",
337 |     "* document: Contains document statistics on number of elements, tables, and figures\n",
338 |     "* pages: Contains markdown version of each page\n",
339 |     "* elements: Contains details and references to Text blocks, figures, tables, charts, etc.\n",
340 |     "\n",
341 |     "Note that the standard output can configured to contain much more information about the document structure, or descriptions of figures, charts, etc. We will explore this in the next notebook"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "id": "34c15055-4cdf-4830-bb31-b801c8f95e3d",
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "standard_output_path = job_metadata[\"output_metadata\"][0][\"segment_metadata\"][0][\"standard_output_path\"]\n",
352 |     "print(f\"Receiving the jobs results from: {standard_output_path}\")\n",
353 |     "standard_output = json.loads(read_s3_object(standard_output_path))\n",
354 |     "JSON(standard_output, root=\"standard_output\")"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "id": "53177bdf-c172-4879-bfa6-8fff323e7a97",
360 |    "metadata": {},
361 |    "source": [
362 |     "## Using BDA for custom outputs with blueprints\n",
363 |     "\n",
364 |     "We can also provide a list of blueprints to be used when invoking BDA through the `InvokeDataAutomationAsync` API.\n",
365 |     "BDA will match the document against the blueprints and extract or derive structured insights based on the blueprint definitions.\n",
366 |     "\n",
367 |     "We will see follow up notebooks how this works in more detail. Here we provide just a high level overview how it can be used, for example in `us-east-1` region.\n",
368 |     "\n",
369 |     "```python\n",
370 |     "response = bda_runtime_client.invoke_data_automation_async(\n",
371 |     "    inputConfiguration={\n",
372 |     "        's3Uri': 's3://bedrock-data-automation-prod-assets-us-east-1/demo-assets/Document/BankStatement.jpg'\n",
373 |     "    },\n",
374 |     "    outputConfiguration={\n",
375 |     "        's3Uri': 's3://my_output'\n",
376 |     "    },\n",
377 |     "    dataAutomationProfileArn = f'arn:aws:bedrock:{current_region}:{account_id}:data-automation-profile/us.data-automation-v1',\n",
378 |     "    blueprints=[\n",
379 |     "    {\n",
380 |     "        'blueprintArn': 'arn:aws:bedrock:us-east-1:aws:blueprint/bedrock-data-automation-public-bank-statement',     \n",
381 |     "    },\n",
382 |     "]\n",
383 |     ")\n",
384 |     "```"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "id": "75c9a2cf-0796-4482-aa30-0dbb63409d02",
390 |    "metadata": {},
391 |    "source": [
392 |     "## Using projects with custom output and standard output\n",
393 |     "\n",
394 |     "A data automation project allows to bundle multiple configurations together, to be consumed as a single unit.\n",
395 |     "It allows in particular to\n",
396 |     "\n",
397 |     "* extend the standard output by defining the granularity and types insights using `standardOutputConfiguration`\n",
398 |     "* define a list of blueprints using `customOutputConfiguration`\n",
399 |     "* activate document splitting using `overrideConfiguration`\n"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "markdown",
404 |    "id": "f6234dcc-7201-439a-a963-ccb945064810",
405 |    "metadata": {},
406 |    "source": [
407 |     "### Creating a data automation project\n",
408 |     "\n",
409 |     "The follow preview shows how we can create a data automation project using the boto3 client.\n",
410 |     "\n",
411 |     "```python\n",
412 |     "import boto3\n",
413 |     "\n",
414 |     "client = boto3.client('bedrock-data-automation')\n",
415 |     "response = bda_runtime_client.create_data_automation_project(\n",
416 |     "    projectName='my name',\n",
417 |     "    projectDescription='my description',\n",
418 |     "    projectStage='LIVE',\n",
419 |     "    standardOutputConfiguration={\n",
420 |     "        \"document\": {\n",
421 |     "            \"extraction\": {\n",
422 |     "              \"granularity\": {\"types\": [\"DOCUMENT\",\"PAGE\", \"ELEMENT\",\"LINE\",\"WORD\"]},\n",
423 |     "              \"boundingBox\": {\"state\": \"ENABLED\"}\n",
424 |     "            },\n",
425 |     "            \"generativeField\": {\"state\": \"ENABLED\"},\n",
426 |     "            \"outputFormat\": {\n",
427 |     "                \"textFormat\": {\"types\": [\"PLAIN_TEXT\", \"MARKDOWN\", \"HTML\", \"CSV\"]},\n",
428 |     "                \"additionalFileFormat\": {\"state\": \"ENABLED\"}\n",
429 |     "                }\n",
430 |     "        },\n",
431 |     "        \"image\": {...},\n",
432 |     "        \"video\": {...},\n",
433 |     "        \"audio\": {...}\n",
434 |     "        },\n",
435 |     "    customOutputConfiguration={\n",
436 |     "        'blueprints': [\n",
437 |     "            {\n",
438 |     "                'blueprintArn': 'arn:aws:bedrock:us-west-2:aws:blueprint/bedrock-data-automation-public-bank-statement'                \n",
439 |     "            },\n",
440 |     "        ]\n",
441 |     "    },\n",
442 |     "    overrideConfiguration={\n",
443 |     "        'document': {\n",
444 |     "            'splitter': {\n",
445 |     "                'state': 'ENABLED'\n",
446 |     "            }\n",
447 |     "        }\n",
448 |     "    },\n",
449 |     ")\n",
450 |     "```"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "markdown",
455 |    "id": "ee5ed930-26a1-4a2a-b3f7-defebef0856b",
456 |    "metadata": {},
457 |    "source": [
458 |     "### Invoking a data automation project\n",
459 |     "\n",
460 |     "We can now invoke a data automation project with an input file using the `InvokeDataAutomationAsync` API and by providing the previously created project ARN.\n",
461 |     "\n",
462 |     "```python\n",
463 |     "response = bda_runtime_client.invoke_data_automation_async(\n",
464 |     "    inputConfiguration={\n",
465 |     "        's3Uri': 's3://bedrock-data-automation-prod-assets-us-west-2/demo-assets/Document/BankStatement.jpg'\n",
466 |     "    },\n",
467 |     "    outputConfiguration={\n",
468 |     "        's3Uri': 's3://my_output'\n",
469 |     "    },\n",
470 |     "    dataAutomationConfiguration={\n",
471 |     "        'dataAutomationArn': 'arn:aws:bedrock:us-west-2:123456789101:data-automation-project/0644799db368',\n",
472 |     "    }\n",
473 |     ")\n",
474 |     "```"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "id": "f0be67fa-1743-4860-82ec-f97317bfe56c",
480 |    "metadata": {},
481 |    "source": [
482 |     "In the next modules we will explore these approaches in more detail."
483 |    ]
484 |   }
485 |  ],
486 |  "metadata": {
487 |   "kernelspec": {
488 |    "display_name": "Python 3 (ipykernel)",
489 |    "language": "python",
490 |    "name": "python3"
491 |   },
492 |   "language_info": {
493 |    "codemirror_mode": {
494 |     "name": "ipython",
495 |     "version": 3
496 |    },
497 |    "file_extension": ".py",
498 |    "mimetype": "text/x-python",
499 |    "name": "python",
500 |    "nbconvert_exporter": "python",
501 |    "pygments_lexer": "ipython3",
502 |    "version": "3.11.10"
503 |   }
504 |  },
505 |  "nbformat": 4,
506 |  "nbformat_minor": 5
507 | }
508 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/utils/bedrock_utils.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | from botocore.exceptions import ClientError
  3 | from .helper_functions import wait_for_completion
  4 | import concurrent.futures
  5 | import logging
  6 | import random
  7 | import string
  8 | from IPython.display import display
  9 | import pandas as pd
 10 | from ipywidgets import Tab, Output, HTML
 11 | 
 12 | logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def get_document_configuration(document_id, plan_name, plan_document_s3_uri): 
 17 |     return {
 18 |                 "content": {
 19 |                     "custom": {
 20 |                         "customDocumentIdentifier": {
 21 |                             "id": document_id
 22 |                         },
 23 |                         "s3Location": {
 24 |                             "uri": plan_document_s3_uri
 25 |                         },
 26 |                         "sourceType": "S3_LOCATION"
 27 |                     },
 28 |                     "dataSourceType": "CUSTOM"
 29 |                 },
 30 |                 "metadata": {
 31 |                     "inlineAttributes": [
 32 |                         {
 33 |                             "key": "plan_name",
 34 |                             "value": {
 35 |                                 "stringValue": plan_name,
 36 |                                 "type": "STRING"
 37 |                             }
 38 |                         }
 39 |                     ],
 40 |                     "type": "IN_LINE_ATTRIBUTE"
 41 |                 }
 42 |             }
 43 | 
 44 | 
 45 | def create_agent_alias(bedrock_agent, agentAliasName, agentId, description):
 46 | 
 47 |     try:
 48 |         agent_alias_list = bedrock_agent.list_agent_aliases(agentId=agentId)
 49 |         existing_alias = next((agent for agent in agent_alias_list['agentAliasSummaries']
 50 |                                 if agent['agentAliasName'] == agentAliasName), None)
 51 |         agentAliasId = None
 52 |         if(existing_alias):
 53 |             agentAliasId = existing_alias['agentAliasId']
 54 |             bedrock_agent.update_agent_alias(
 55 |                 agentAliasId=agentAliasId,
 56 |                 agentAliasName=agentAliasName,
 57 |                 agentId=agentId,
 58 |                 description=description,
 59 |             )
 60 |         else:
 61 |             create_agent_alias_response = bedrock_agent.create_agent_alias(
 62 |                 agentAliasName=agentAliasName,
 63 |                 agentId=agentId,
 64 |                 description=description
 65 |             )
 66 |             agentAliasId = create_agent_alias_response['agentAlias']['agentAliasId']
 67 |         status_response = wait_for_completion(
 68 |             bedrock_agent,
 69 |             bedrock_agent.get_agent_alias,
 70 |             {
 71 |                 'agentId': agentId,
 72 |                 'agentAliasId':agentAliasId
 73 |             },
 74 |             'agentAlias.agentAliasStatus',
 75 |             ['PREPARED'],
 76 |             ['FAILED'],
 77 |             max_iterations=10,
 78 |             delay=5,
 79 |         )
 80 |         status = status_response['agentAlias']['agentAliasStatus']
 81 |         print(f"{'Updated' if existing_alias else 'Created'} agent alias with name {agentAliasName} and current status {status}")
 82 |         return agentAliasId, status
 83 |     except ClientError as e:
 84 |         print(f"Error creating or retrieving agent: {e}")
 85 |         raise
 86 |     except Exception as e:
 87 |         print(f"Error: {e}")
 88 |         raise
 89 | 
 90 | 
 91 | def associate_agent_knowledge_base(bedrock_agent, agentId, agentVersion, description, knowledgeBaseId, knowledgeBaseState):
 92 | 
 93 |     agents_kb_list = bedrock_agent.list_agent_knowledge_bases(
 94 |                         agentId=agentId,
 95 |                         agentVersion=agentVersion)
 96 |     existing_agent_kb = next((agent_kb for agent_kb in agents_kb_list['agentKnowledgeBaseSummaries']
 97 |                         if agent_kb['knowledgeBaseId'] == knowledgeBaseId), None)
 98 |     if existing_agent_kb:
 99 |         print(f'Knowledge Base {knowledgeBaseId} already associated with agent {agentId}:{agentVersion}, Updating it.')
100 |         bedrock_agent.update_agent_knowledge_base(
101 |             agentId=agentId,
102 |             agentVersion=agentVersion,
103 |             description=description,
104 |             knowledgeBaseId=knowledgeBaseId,
105 |             knowledgeBaseState=knowledgeBaseState
106 |         )
107 |     else:
108 |         bedrock_agent.associate_agent_knowledge_base(
109 |             agentId=agentId,
110 |             agentVersion=agentVersion,
111 |             description=description,
112 |             knowledgeBaseId=knowledgeBaseId,
113 |             knowledgeBaseState=knowledgeBaseState
114 |         )
115 | 
116 | def create_agent_action_group(bedrock_agent, actionGroupName, description, 
117 |                               actionGroupState, agentId, agentVersion,
118 |                               apiSchema,agent_actions_lambda_arn):
119 | 
120 |     try:
121 |         agents_ag_list = bedrock_agent.list_agent_action_groups(
122 |                             agentId=agentId,
123 |                             agentVersion=agentVersion
124 |                         )
125 |         existing_agent_ag = next((agent for agent in agents_ag_list['actionGroupSummaries']
126 |                             if agent['actionGroupName'] == actionGroupName), None)
127 |         actionGroupId = None
128 |         if existing_agent_ag:
129 |             actionGroupId = existing_agent_ag['actionGroupId']
130 |             actionGroupName = existing_agent_ag['actionGroupName']
131 |             print(f"Action group with name {actionGroupName} already exists. Will update and enable it") 
132 |             bedrock_agent.update_agent_action_group(
133 |                 actionGroupExecutor={
134 |                     'lambda': agent_actions_lambda_arn
135 |                 },
136 |                 actionGroupId=actionGroupId,
137 |                 actionGroupName=actionGroupName,
138 |                 actionGroupState='ENABLED',
139 |                 agentId=agentId,
140 |                 apiSchema=apiSchema,
141 |                 agentVersion=agentVersion
142 |             )
143 |         else:
144 |             # Create agent
145 |             print(f'Creating new agent action group with name {actionGroupName}')
146 |             create_action_group_response = bedrock_agent.create_agent_action_group(
147 |                 actionGroupExecutor={
148 |                     'lambda': agent_actions_lambda_arn
149 |                 },
150 |                 actionGroupName=actionGroupName,
151 |                 actionGroupState='ENABLED',
152 |                 agentId=agentId,
153 |                 apiSchema=apiSchema,
154 |                 agentVersion=agentVersion
155 |             )
156 |             actionGroupId = create_action_group_response['agentActionGroup']['actionGroupId']
157 |         status_response = wait_for_completion(
158 |             bedrock_agent,
159 |             bedrock_agent.get_agent_action_group,
160 |             {
161 |                 'actionGroupId': actionGroupId,
162 |                 'agentId':agentId,
163 |                 'agentVersion': agentVersion
164 |             },
165 |             'agentActionGroup.actionGroupState',
166 |             ['ENABLED'],
167 |             [],
168 |             max_iterations=10,
169 |             delay=2,
170 |         )
171 |         status = status_response['agentActionGroup']['actionGroupState']
172 |         print(f"{'Updated' if existing_agent_ag else 'Created'} agent action group with name {actionGroupName} and current status {status}")
173 |         return actionGroupId, status
174 |     except ClientError as e:
175 |         print(f"Error creating or retrieving agent: {e}")
176 |         raise
177 |     except Exception as e:
178 |         print(f"Error: {e}")
179 |         raise    
180 | 
181 | 
182 | def create_agent(bedrock_agent, agentName, agent_service_role_arn, 
183 |                  description, foundation_model_id, agent_instruction, orchestrationType):
184 | 
185 |     try:
186 |         agents_list = bedrock_agent.list_agents()
187 |         existing_agent = next((agent for agent in agents_list['agentSummaries']
188 |                             if agent['agentName'] == agentName), None)
189 |         agent_id = None
190 |         agent_arn = None
191 |         if existing_agent:
192 |             agent_id = existing_agent['agentId']
193 |             agent_current_status = existing_agent['agentStatus']
194 |             print(f"Using existing Agent with name {existing_agent['agentName']} and status {agent_current_status}")
195 |             update_agent_response = bedrock_agent.update_agent(
196 |                 agentId=agent_id,
197 |                 agentName=agentName,
198 |                 agentResourceRoleArn=agent_service_role_arn,
199 |                 description=description,
200 |                 foundationModel=foundation_model_id,
201 |                 instruction=agent_instruction,
202 |                 orchestrationType=orchestrationType
203 |             )
204 |             agent_arn = update_agent_response['agent']['agentArn']
205 |         else:
206 |             # Create agent
207 |             print(f'Creating new agent with name {agentName}')
208 |             create_agent_response = bedrock_agent.create_agent(
209 |                 agentName=agentName,
210 |                 agentResourceRoleArn=agent_service_role_arn,
211 |                 description=description,
212 |                 foundationModel=foundation_model_id,
213 |                 instruction=agent_instruction,
214 |                 orchestrationType=orchestrationType
215 |             )
216 |             agent_id = create_agent_response['agent']['agentId']
217 |             agent_arn = create_agent_response['agent']['agentArn']
218 |         status_response = wait_for_completion(
219 |             bedrock_agent,
220 |             bedrock_agent.get_agent,
221 |             {'agentId': agent_id},
222 |             'agent.agentStatus',
223 |             ['NOT_PREPARED', 'PREPARED'],
224 |             ['FAILED'],
225 |             max_iterations=10,
226 |             delay=2,
227 |         )
228 |         status = status_response['agent']['agentStatus']
229 |         version = status_response['agent'].get('agentVersion', None)
230 |         print(f"{'Updated' if existing_agent else 'Created'} agent with name {agentName} and current status {status}")
231 |         return agent_id, status, version, agent_arn
232 |     except ClientError as e:
233 |         print(f"Error creating or retrieving agent: {e}")
234 |         raise
235 |     except Exception as e:
236 |         print(f"Error: {e}")
237 |         raise    
238 | 
239 |         
240 | def create_knowledge_base(bedrock_agent, kb_name, 
241 |                           kb_description, 
242 |                           kb_role_arn,
243 |                           embedding_model_arn,
244 |                           vector_store_collection_arn,
245 |                           vector_store_index_name):
246 |     storage_configuration = {
247 |         'opensearchServerlessConfiguration': {
248 |             'collectionArn': vector_store_collection_arn,
249 |             'fieldMapping': {
250 |                 'metadataField': 'text-metadata',
251 |                 'textField': 'text',
252 |                 'vectorField': 'vector'
253 |             },
254 |             'vectorIndexName': vector_store_index_name
255 |         },
256 |         "type": 'OPENSEARCH_SERVERLESS'
257 |     }
258 |     embedding_model_configuration = {
259 |         "bedrockEmbeddingModelConfiguration": {
260 |             "dimensions": 1024
261 |         }
262 |     }
263 |     knowledge_base_configuration = {
264 |         'type': 'VECTOR',
265 |         'vectorKnowledgeBaseConfiguration': {
266 |             'embeddingModelArn': embedding_model_arn,
267 |             'embeddingModelConfiguration': embedding_model_configuration
268 |         }
269 |     }
270 |     try:
271 |         kb_list = bedrock_agent.list_knowledge_bases()
272 |         existing_kb = next((kb for kb in kb_list['knowledgeBaseSummaries']
273 |                             if kb['name'] == kb_name), None)
274 |         if existing_kb:
275 |             knowledge_base_id = existing_kb['knowledgeBaseId']
276 |             kb_current_status = existing_kb['status']
277 |             if kb_current_status != 'ACTIVE':
278 |                 raise Exception(f"Knowledge Base with name {existing_kb['name']} exists but is not in ACTIVE state. Knowledge Base state: {kb_current_status}")
279 |             print(f"Using existing Knowledge Base with name {existing_kb['name']} and status {kb_current_status}")
280 |             return knowledge_base_id, kb_current_status
281 |         else:
282 |             # Create knowledge base
283 |             print(f'Creating new KB with name {kb_name}')
284 |             create_kb_response = bedrock_agent.create_knowledge_base(
285 |                 description=kb_description,
286 |                 knowledgeBaseConfiguration=knowledge_base_configuration,
287 |                 name=kb_name,
288 |                 roleArn=kb_role_arn,
289 |                 storageConfiguration=storage_configuration
290 |             )
291 |             knowledge_base_id = create_kb_response['knowledgeBase']['knowledgeBaseId']
292 |             status_response = wait_for_completion(
293 |                 bedrock_agent,
294 |                 bedrock_agent.get_knowledge_base,
295 |                 {'knowledgeBaseId': knowledge_base_id},
296 |                 'knowledgeBase.status',
297 |                 ['ACTIVE'],
298 |                 ['FAILED'],
299 |                 max_iterations=10,
300 |                 delay=10,
301 |             )
302 |             print(f"Created Knowledge Base with name {kb_name} and current status {status_response['knowledgeBase']['status']}")
303 |             return knowledge_base_id, status_response['knowledgeBase']['status']
304 |     except ClientError as e:
305 |         print(f"Error creating or retrieving knowledge base: {e}")
306 |         raise
307 |     except Exception as e:
308 |         print(f"Error: {e}")
309 |         raise
310 | 
311 | 
312 | def create_data_source(bedrock_agent, knowledge_base_id, datasource_name='claims-eoc-datasource') :
313 | 
314 |     data_source_configuration = {
315 |         'type': 'CUSTOM'
316 |     }
317 |     
318 |     chunking_configuration = {
319 |         'chunkingStrategy': 'HIERARCHICAL',
320 |         'hierarchicalChunkingConfiguration': {
321 |             'levelConfigurations': [
322 |                 {
323 |                     'maxTokens': 1500
324 |                 },
325 |                 {
326 |                     'maxTokens': 300
327 |                 },
328 |             ],
329 |             'overlapTokens': 60
330 |         }
331 |     }
332 |     
333 |     ds_list = bedrock_agent.list_data_sources(knowledgeBaseId=knowledge_base_id)
334 |     existing_ds = next((ds for 
335 |                                ds in ds_list['dataSourceSummaries']
336 |                                if ds['name'] == datasource_name), None)
337 |     if (existing_ds):
338 |         existing_ds_id = existing_ds['dataSourceId']
339 |         ds_current_status = existing_ds['status']
340 |         if ds_current_status != 'AVAILABLE':
341 |                 raise Exception(f"Data source with name {existing_ds['name']} exists but is not in AVAILABLE state. Data source state: {ds_current_status}")
342 |         print(f"Using existing Data source with name {existing_ds['name']} and status {ds_current_status}")
343 |         return existing_ds_id, ds_current_status
344 |     else:
345 |         print(f"Creating new Data source with name {datasource_name}")
346 |         create_ds_response = bedrock_agent.create_data_source(
347 |             dataSourceConfiguration=data_source_configuration,
348 |             description='direct injection of claims eoc documents',
349 |             knowledgeBaseId=knowledge_base_id,
350 |             name=datasource_name,
351 |             vectorIngestionConfiguration={
352 |                 'chunkingConfiguration': chunking_configuration
353 |             }
354 |         )
355 |         datasource_id = create_ds_response['dataSource']['dataSourceId']
356 |         status_response = wait_for_completion(
357 |             bedrock_agent,
358 |             bedrock_agent.get_data_source,
359 |             {'knowledgeBaseId': knowledge_base_id, 'dataSourceId': datasource_id},
360 |             'dataSource.status',
361 |             ['AVAILABLE'],
362 |             ['FAILED'],
363 |             max_iterations=5,
364 |             delay=5,
365 |         )
366 |         print(f"Created datasource  with name {status_response['dataSource']['name']} and current status {status_response['dataSource']['status']}")        
367 |         return datasource_id, status_response['dataSource']['status']
368 |             
369 | 
370 | def ingest_and_wait(bedrock_agent, data_source_id , knowledge_base_id, documents):
371 | 
372 |     print("Ingesting documents...")
373 |     bedrock_agent.ingest_knowledge_base_documents(
374 |         dataSourceId=data_source_id,
375 |         knowledgeBaseId=knowledge_base_id,
376 |         documents=[
377 |             get_document_configuration(document['document_id'], document['plan_name'], document['document_uri'])
378 |             for document in documents
379 |         ]
380 |     )
381 |     
382 |     def wait_for_single_document(document):
383 |         return wait_for_completion(
384 |             client=bedrock_agent,
385 |             get_status_function=bedrock_agent.get_knowledge_base_documents,
386 |             status_kwargs={
387 |                 'dataSourceId': data_source_id,
388 |                 'knowledgeBaseId': knowledge_base_id,
389 |                 'documentIdentifiers': [{
390 |                     'custom': {
391 |                         'id': document['document_id']
392 |                     },
393 |                     'dataSourceType': 'CUSTOM'}]
394 |             },
395 |             completion_states=['INDEXED'],
396 |             error_states=['FAILED'],
397 |             status_path_in_response='documentDetails[0].status',
398 |             max_iterations=5,
399 |             delay=5, verbose=False
400 |         )
401 | 
402 |     # Use ThreadPoolExecutor to run wait_for_completion in parallel
403 |     with concurrent.futures.ThreadPoolExecutor() as executor:
404 |         # Submit all tasks
405 |         future_to_document = {executor.submit(wait_for_single_document, document): document for document in documents}
406 |         
407 |         # Wait for all tasks to complete and collect results
408 |         results = []
409 |         for future in concurrent.futures.as_completed(future_to_document):
410 |             document = future_to_document[future]
411 |             try:
412 |                 result = future.result()
413 |                 result.update(document)
414 |                 results.append(result['documentDetails'][0])
415 |             except Exception as exc:
416 |                 print(f"Document {document['document_id']} generated an exception: {exc}")
417 |                 results.append((document, None))
418 |                 raise
419 |     print("Ingestion complete.")
420 | 
421 |     # Consolidate and return results
422 |     return results
423 | 
424 | 
425 | def add_lambda_permission(
426 |     function_name,
427 |     principal,
428 |     action,
429 |     source_arn=None,
430 |     verbose = False
431 | ):
432 |     lambda_client = boto3.client('lambda')
433 |     try:
434 |         statement_id_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
435 |         statement_id = f"claims-review-agent-actions-{statement_id_suffix}"
436 |         kwargs = {
437 |             'FunctionName': function_name,
438 |             'StatementId': statement_id,
439 |             'Action': action,
440 |             'Principal': principal
441 |         }
442 |         # Add source_arn if provided
443 |         if source_arn:
444 |             kwargs['SourceArn'] = source_arn
445 |         response = lambda_client.add_permission(**kwargs)
446 |         print(f"Successfully added permission: {response}")
447 |         return response
448 |     except Exception as e:
449 |         print(f"Error adding permission: {str(e)}")
450 |         raise e
451 | 
452 | 
453 | def invoke_agent_helper(bedrock_agent_runtime_client, query, 
454 |                             session_id, agent_id, alias_id,
455 |                             enable_trace=False, session_state=None):
456 |         
457 |     end_session: bool = False
458 |     if not session_state:
459 |         session_state = {}
460 |     
461 |     # Create main output widget for final answer
462 |     final_answer_output = Output()
463 |     
464 |     # Create tab widget
465 |     tab = Tab()
466 |     tab_contents = []
467 |     
468 |     if enable_trace:
469 |         display(final_answer_output)
470 |         display(tab)
471 |     
472 |     def extract_trace_info(trace_event):
473 |         """Helper function to extract relevant information from trace"""
474 |         trace = trace_event.get('trace', {})
475 |         orchestration_trace = trace.get('orchestrationTrace', {})
476 |         model_invocation = orchestration_trace.get('modelInvocationInput', {})
477 |         
478 |         return {
479 |             'Trace ID': model_invocation.get('traceId', ''),
480 |             'Type': model_invocation.get('type', ''),
481 |             'Text': model_invocation.get('text', '')
482 |         }
483 | 
484 |     def create_trace_tab(trace_info):
485 |         """Helper function to create a new tab for a trace"""
486 |         output = Output()
487 |         with output:
488 |             df = pd.DataFrame([trace_info])
489 |             display(HTML(f"""
490 |             <style>
491 |                 table {{width: 100%; border-collapse: collapse;}}
492 |                 th, td {{padding: 8px; text-align: left; border: 1px solid #ddd;}}
493 |                 th {{background-color: #f2f2f2;}}
494 |                 tr:nth-child(even) {{background-color: #f9f9f9;}}
495 |             </style>
496 |             {df.to_html(escape=False, index=False)}
497 |             """))
498 |         return output
499 | 
500 |     # invoke the agent API
501 |     agent_response = bedrock_agent_runtime_client.invoke_agent(
502 |         inputText=query,
503 |         agentId=agent_id,
504 |         agentAliasId=alias_id,
505 |         sessionId=session_id,
506 |         enableTrace=enable_trace,
507 |         endSession=end_session,
508 |         sessionState=session_state
509 |     )
510 | 
511 |     event_stream = agent_response['completion']
512 |     try:
513 |         for event in event_stream: 
514 |             if 'chunk' in event:
515 |                 data = event['chunk']['bytes']
516 |                 agent_answer = data.decode('utf8')
517 |                 return agent_answer                
518 |             elif 'trace' in event:
519 |                 if enable_trace:
520 |                     trace_info = extract_trace_info(event.get('trace',{}))
521 |                     if trace_info['Trace ID']:  # Only add if we have a valid trace
522 |                         # Create new tab for this trace
523 |                         new_tab = create_trace_tab(trace_info)
524 |                         tab_contents.append(new_tab)
525 |                         
526 |                         # Update tab widget
527 |                         tab.children = tuple(tab_contents)
528 |                         # Set tab title
529 |                         tab.set_title(len(tab_contents) - 1, f"Trace {len(tab_contents)}")
530 |             else:
531 |                 raise Exception("unexpected event.", event)
532 |     except Exception as e:
533 |         raise Exception("unexpected event.", e)
534 | 
535 | 


--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/utils/helper_functions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import boto3
  4 | from urllib.parse import urlparse
  5 | import requests
  6 | import base64
  7 | import io
  8 | from PIL import Image
  9 | from PyPDF2 import PdfReader, PdfWriter
 10 | from botocore.exceptions import ClientError
 11 | from IPython.display import HTML
 12 | from IPython.display import display
 13 | from botocore.auth import SigV4Auth
 14 | from botocore.awsrequest import AWSRequest
 15 | import json
 16 | import ipywidgets as widgets
 17 | import html
 18 | import pandas as pd
 19 | 
 20 | s3_client = boto3.client("s3")
 21 | bda_client = boto3.client('bedrock-data-automation')
 22 | bda_runtime_client = boto3.client('bedrock-data-automation-runtime')
 23 | cfn = boto3.client(service_name='cloudformation')
 24 | region_name = boto3.session.Session().region_name
 25 | # Dictionary to store the outputs
 26 | resource_attributes = {}
 27 | target_output_key = 'BDAWorkshopVPC'
 28 | 
 29 | def get_stack_outputs():
 30 |     # Initialize CloudFormation client
 31 |     cf_client = boto3.client('cloudformation', region_name=region_name)
 32 |     try:
 33 |         # Get all stacks
 34 |         paginator = cf_client.get_paginator('list_stacks')
 35 |         for page in paginator.paginate(StackStatusFilter=['CREATE_COMPLETE', 'UPDATE_COMPLETE']):
 36 |             for stack in page['StackSummaries']:
 37 |                 stack_name = stack['StackName']
 38 |                 # Get stack details including outputs
 39 |                 try:
 40 |                     response = cf_client.describe_stacks(StackName=stack_name)
 41 |                     # Check if stack has outputs
 42 |                     if 'Outputs' in response['Stacks'][0]:
 43 |                         outputs = response['Stacks'][0]['Outputs']
 44 |                         # Look for target OutputKey
 45 |                         if any(output['OutputKey'] == target_output_key 
 46 |                                for output in outputs):
 47 |                             # Found the stack with target OutputKey, get all its outputs
 48 |                             for output in outputs:
 49 |                                 resource_attributes[output['OutputKey']] = output['OutputValue']
 50 |                             return resource_attributes
 51 |                 except cf_client.exceptions.ClientError as e:
 52 |                     print(f"Error describing stack {stack_name}: {str(e)}")
 53 |                     continue
 54 |         print(f"No stack found with OutputKey: {target_output_key}")
 55 |         return None
 56 |     except Exception as e:
 57 |         print(f"Error: {str(e)}")
 58 |         return None
 59 | 
 60 | 
 61 | def get_stack_output(stack_name, output_key):
 62 |     response = cfn.describe_stacks( StackName=stack_name)
 63 |     stack = next((s for s in response['Stacks'] if s['StackName'] == stack_name), None)
 64 |     return next((o['OutputValue'] for o in stack['Outputs'] if o['OutputKey'] == output_key), None) if stack else None
 65 | 
 66 | 
 67 | def pil_to_bytes(image):
 68 |     byte_arr = io.BytesIO()
 69 |     image.save(byte_arr, format='PNG')
 70 |     return byte_arr.getvalue()
 71 | 
 72 | 
 73 | def display_image(image):
 74 |     image_widget = widgets.Image(value=pil_to_bytes(image), format='png')
 75 |     image_widget.layout.width = '400px'
 76 |     image_widget.layout.height = 'auto'
 77 |     image_widget.layout.object_fit = 'contain'
 78 |     return image_widget
 79 | 
 80 | def json_to_html(json_obj, indent=0):
 81 |     result = []
 82 |     if isinstance(json_obj, dict):
 83 |         result.append('<table class="json-object">')
 84 |         for key, value in json_obj.items():
 85 |             result.append('<tr>')
 86 |             result.append(f'<td class="key">{key}</td>')
 87 |             result.append('<td class="value">')
 88 |             result.append(json_to_html(value, indent + 1))
 89 |             result.append('</td>')
 90 |             result.append('</tr>')
 91 |         result.append('</table>')
 92 |     elif isinstance(json_obj, list):
 93 |         result.append('<table class="json-array">')
 94 |         for i, item in enumerate(json_obj):
 95 |             result.append('<tr>')
 96 |             result.append(f'<td class="key">{i}</td>')
 97 |             result.append('<td class="value">')
 98 |             result.append(json_to_html(item, indent + 1))
 99 |             result.append('</td>')
100 |             result.append('</tr>')
101 |         result.append('</table>')
102 |     elif isinstance(json_obj, (str, int, float, bool)) or json_obj is None:
103 |         if isinstance(json_obj, str):
104 |             result.append(f'<span class="string">"{json_obj}"</span>')
105 |         elif isinstance(json_obj, bool):
106 |             result.append(f'<span class="boolean">{str(json_obj).lower()}</span>')
107 |         elif json_obj is None:
108 |             result.append('<span class="null">null</span>')
109 |         else:
110 |             result.append(f'<span class="number">{json_obj}</span>')
111 |     return ''.join(result)
112 |     
113 | def display_json(json_data, title):
114 |     html_content = f"""
115 |     <div class="json-container">
116 |         <h3 class="json-title">{title}</h3>
117 |         <div class="json-viewer">
118 |             {json_to_html(json_data)}
119 |         </div>
120 |     </div>
121 |     <style>
122 |         .json-container {{
123 |             margin-bottom: 20px;
124 |         }}
125 |         .json-title {{
126 |             font-family: sans-serif;
127 |             font-size: 18px;
128 |             font-weight: bold;
129 |             margin-bottom: 10px;
130 |             color: #333;
131 |         }}
132 |         .json-viewer {{
133 |             font-family: monospace;
134 |             font-size: 14px;
135 |             line-height: 1.5;
136 |             background-color: #f8f8f8;
137 |             border: 1px solid #ddd;
138 |             border-radius: 4px;
139 |             padding: 10px;
140 |             max-height: 500px;
141 |             overflow: auto;
142 |         }}
143 |         .json-object, .json-array {{
144 |             border-collapse: collapse;
145 |             margin-left: 20px;
146 |         }}
147 |         .key {{
148 |             color: #881391;
149 |             vertical-align: top;
150 |             padding-right: 10px;
151 |         }}
152 |         .value {{
153 |             padding-left: 10px;
154 |         }}
155 |         .string {{ color: #1a1aa6; }}
156 |         .number {{ color: #116644; }}
157 |         .boolean {{ color: #ff8c00; }}
158 |         .null {{ color: #808080; }}
159 |     </style>
160 |     """
161 |     return widgets.HTML(html_content)
162 | 
163 | def display_image_jsons(image, json_arr, titles):
164 |     image_widget = display_image(image)
165 |     right_column =  widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)])
166 |     bordered_hbox = widgets.HBox([image_widget, right_column])
167 |     bordered_hbox.layout.border = '5px solid black'
168 |     bordered_hbox.layout.padding = '10px'
169 |     bordered_hbox.layout.margin = '10px'
170 |     return bordered_hbox
171 | 
172 | def get_bucket_and_key(s3_uri):
173 |     parsed_uri = urlparse(s3_uri)
174 |     bucket_name = parsed_uri.netloc
175 |     object_key = parsed_uri.path.lstrip('/')
176 |     return (bucket_name, object_key)
177 | 
178 | def wait_for_job_to_complete(invocationArn):
179 |     get_status_response = bda_runtime_client.get_data_automation_status(
180 |          invocationArn=invocationArn)
181 |     status = get_status_response['status']
182 |     job_id = invocationArn.split('/')[-1]
183 |     max_iterations = 60
184 |     iteration_count = 0
185 |     while status not in ['Success', 'ServiceError', 'ClientError']:
186 |         print(f'Waiting for Job to Complete. Current status is {status}')
187 |         # Wait for kernel restart
188 |         time.sleep(10) # nosemgrep
189 |         iteration_count += 1
190 |         if iteration_count >= max_iterations:
191 |             print(f"Maximum number of iterations ({max_iterations}) reached. Breaking the loop.")
192 |             break
193 |         get_status_response = bda_runtime_client.get_data_automation_status(
194 |          invocationArn=invocationArn)
195 |         status = get_status_response['status']
196 |     if iteration_count >= max_iterations:
197 |         raise Exception("Job did not complete within the expected time frame.")
198 |     else:
199 |         print(f"Invocation Job with id {job_id} completed. Status is {status}")
200 |     return get_status_response
201 | 
202 | 
203 | def read_s3_object(s3_uri):
204 |     # Parse the S3 URI
205 |     parsed_uri = urlparse(s3_uri)
206 |     bucket_name = parsed_uri.netloc
207 |     object_key = parsed_uri.path.lstrip('/')
208 |     # Create an S3 client
209 |     s3_client = boto3.client('s3')
210 |     try:
211 |         # Get the object from S3
212 |         response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
213 |         
214 |         # Read the content of the object
215 |         content = response['Body'].read().decode('utf-8')
216 |         return content
217 |     except Exception as e:
218 |         print(f"Error reading S3 object: {e}")
219 |         return None
220 | 
221 | def download_document(url, start_page_index=None, end_page_index=None, output_file_path=None):
222 | 
223 |     if not output_file_path:
224 |         filename = os.path.basename(url)
225 |         output_file_path = filename
226 |         
227 |     # Download the PDF
228 |     response = requests.get(url, timeout=30) # nosemgrep
229 |     print(response)
230 |     pdf_content = io.BytesIO(response.content)
231 |     
232 |     # Create a PDF reader object
233 |     pdf_reader = PdfReader(pdf_content)
234 |     
235 |     # Create a PDF writer object
236 |     pdf_writer = PdfWriter()
237 |     
238 |     start_page_index = 0 if not start_page_index else max(start_page_index,0)
239 |     end_page_index = len(pdf_reader.pages)-1 if not end_page_index else min(end_page_index,len(pdf_reader.pages)-1)
240 | 
241 |     # Specify the pages you want to extract (0-indexed)
242 |     pages_to_extract = list(range(start_page_index, end_page_index))
243 |     
244 |     # Add the specified pages to the writer
245 |     for page_num in pages_to_extract:
246 |         page = pdf_reader.pages[page_num]
247 |         pdf_writer.add_page(page)
248 | 
249 |     print(f"Created file: {output_file_path}")
250 |     # Save the extracted pages to a new PDF
251 |     with open(output_file_path, "wb") as output_file:
252 |         pdf_writer.write(output_file)
253 |     return output_file_path
254 | 
255 | 
256 | def create_image_html_column(row: pd.Series, image_col: str, width: str = '300px') -> str:
257 |     """
258 |     Create HTML embedded image from S3 URI by downloading and base64 encoding the image for a DataFrame row.
259 |     
260 |     Args:
261 |         row (pd.Series): DataFrame row
262 |         image_col (str): Name of column containing S3 URI
263 |         width (str): Fixed width for image
264 |         
265 |     Returns:
266 |         str: HTML string for embedded image
267 |     """
268 |     s3_uri = row[image_col]
269 |     if isinstance(s3_uri, list):
270 |         s3_uri = s3_uri[0]    
271 |     if pd.isna(s3_uri):
272 |         return ''
273 |     
274 |     try:
275 |         # Parse S3 URI
276 |         bucket_name, object_key = get_bucket_and_key(s3_uri)
277 | 
278 |         
279 |         # Initialize S3 client
280 |         s3_client = boto3.client('s3')
281 |         
282 |         # Download image from S3
283 |         response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
284 |         image_content = response['Body'].read()
285 |         
286 |         # Open image using PIL
287 |         image = Image.open(io.BytesIO(image_content))
288 |         
289 |         # Convert image to RGB if it's in RGBA mode
290 |         if image.mode == 'RGBA':
291 |             image = image.convert('RGB')
292 |         
293 |         # Save image to bytes
294 |         buffered = io.BytesIO()
295 |         image.save(buffered, format="JPEG")
296 |         
297 |         # Encode image to base64
298 |         img_str = base64.b64encode(buffered.getvalue()).decode()
299 |         
300 |         # Create HTML string with base64 encoded image
301 |         return f'<img src="data:image/jpeg;base64,{img_str}" style="width: {width}; object-fit: contain;">'
302 |     except Exception as e:
303 |         print(f"Error processing image {s3_uri}: {str(e)}")
304 |         return ''
305 | 
306 | # Example usage:
307 | """
308 | # Add embedded images column
309 | df['embedded_images'] = add_embedded_images(df, 'crop_images', width='300px')
310 | 
311 | # For Jupyter notebook display:
312 | from IPython.display import HTML
313 | HTML(df['embedded_images'].iloc[0])
314 | """
315 | 
316 | 
317 | 
318 | def wait_for_completion(
319 |     client,
320 |     get_status_function,
321 |     status_kwargs,
322 |     status_path_in_response,
323 |     completion_states,
324 |     error_states,
325 |     max_iterations=60,
326 |     delay=10,
327 |     verbose=True
328 | ):
329 |     for _ in range(max_iterations):
330 |         try:
331 |             response = get_status_function(**status_kwargs)
332 |             status = get_nested_value_new(response, status_path_in_response)
333 | 
334 |             if status in completion_states:
335 |                 if(verbose):
336 |                     print(f"Operation completed successfully with status: {status}")
337 |                 return response
338 | 
339 |             if status in error_states:
340 |                 raise Exception(f"Operation failed with status: {status}")
341 |             if(verbose):
342 |                 print(f"Current status: {status}. Waiting...")
343 |             time.sleep(delay) # nosemgrep
344 | 
345 |         except ClientError as e:
346 |             raise Exception(f"Error checking status: {str(e)}")
347 | 
348 |     raise Exception(f"Operation timed out after {max_iterations} iterations")
349 | 
350 | def get_nested_value_new(data, path):
351 |     """Get value from nested dict/list using dot path with array support (e.g., 'items[0].name')"""
352 |     current = data
353 |     try:
354 |         for part in path.replace('[', '.[').split('.'):
355 |             if not part: 
356 |                 continue
357 |             if '[' in part:
358 |                 name, index = part.split('[')
359 |                 current = current[name] if name else current
360 |                 current = current[int(index.rstrip(']'))]
361 |             else:
362 |                 current = current[part]
363 |         return current
364 |     except (KeyError, IndexError, TypeError, ValueError):
365 |         return None
366 | 
367 | def get_nested_value(data, path):
368 |     """
369 |     Retrieve a value from a nested dictionary using a dot-separated path.
370 | 
371 |     :param data: The dictionary to search
372 |     :param path: A string representing the path to the value, e.g., "Job.Status"
373 |     :return: The value at the specified path, or None if not found
374 |     """
375 |     keys = path.split('.')
376 |     for key in keys:
377 |         if isinstance(data, dict) and key in data:
378 |             data = data[key]
379 |         else:
380 |             return None
381 |     return data
382 | 
383 | 
384 | def display_html(data, root='root', expanded=True, bg_color='#f0f0f0'):
385 |     html = f"""
386 |         <div class="custom-json-output" style="background-color: {bg_color}; padding: 10px; border-radius: 5px;">
387 |             <button class="toggle-btn" style="margin-bottom: 10px;">{'Collapse' if expanded else 'Expand'}</button>
388 |             <pre class="json-content" style="display: {'block' if expanded else 'none'};">{data}</pre>
389 |         </div>
390 |         <script>
391 |         (function() {{
392 |             var toggleBtn = document.currentScript.previousElementSibling.querySelector('.toggle-btn');
393 |             var jsonContent = document.currentScript.previousElementSibling.querySelector('.json-content');
394 |             toggleBtn.addEventListener('click', function() {{
395 |                 if (jsonContent.style.display === 'none') {{
396 |                     jsonContent.style.display = 'block';
397 |                     toggleBtn.textContent = 'Collapse';
398 |                 }} else {{
399 |                     jsonContent.style.display = 'none';
400 |                     toggleBtn.textContent = 'Expand';
401 |                 }}
402 |             }});
403 |         }})();
404 |         </script>
405 |         """
406 |     display(HTML(html))
407 | 
408 | def send_request(region, url, method, credentials, payload=None, service='bedrock'):
409 |     host = url.split("/")[2]
410 |     request = AWSRequest(
411 |             method,
412 |             url,
413 |             data=payload,
414 |             headers={'Host': host, 'Content-Type':'application/json'}
415 |     )    
416 |     SigV4Auth(credentials, service, region).add_auth(request)
417 |     response = requests.request(method, url, headers=dict(request.headers), data=payload, timeout=50)
418 |     response.raise_for_status()
419 |     content = response.content.decode("utf-8")
420 |     data = json.loads(content)
421 |     return data
422 | 
423 | def invoke_blueprint_recommendation_async(bda_client, payload):
424 |     credentials = boto3.Session().get_credentials().get_frozen_credentials()
425 |     region_name = boto3.Session().region_name
426 |     url = f"{bda_client.meta.endpoint_url}/invokeBlueprintRecommendationAsync"
427 |     print(f'Sending request to {url}')
428 |     result = send_request(
429 |         region = region_name,
430 |         url = url,
431 |         method = "POST", 
432 |         credentials = credentials,
433 |         payload=payload
434 |     )
435 |     return result
436 | 
437 | 
438 | def get_blueprint_recommendation(bda_client, job_id):
439 |     credentials = boto3.Session().get_credentials().get_frozen_credentials()
440 |     region_name = boto3.Session().region_name
441 |     url = f"{bda_client.meta.endpoint_url}/getBlueprintRecommendation/{job_id}/"
442 |     result = send_request(
443 |         region = region_name,
444 |         url = url,
445 |         method = "POST",
446 |         credentials = credentials        
447 |     )
448 |     return result
449 | 
450 | def get_s3_to_dict(s3_url):
451 |     bucket_name = s3_url.split('/')[2]
452 |     object_key = '/'.join(s3_url.split('/')[3:])
453 |     
454 |     # Download the JSON file from S3
455 |     response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
456 |     json_content = response['Body'].read().decode('utf-8')
457 |     
458 |     # Parse the JSON content
459 |     json_obj = json.loads(json_content)
460 |     return json_obj
461 | 
462 | def create_or_update_blueprint(bda_client, blueprint_name, blueprint_description, blueprint_type, blueprint_stage, blueprint_schema):
463 |     list_blueprints_response = bda_client.list_blueprints(
464 |         blueprintStageFilter='ALL'
465 |     )
466 |     blueprint = next((blueprint for blueprint in
467 |                       list_blueprints_response['blueprints']
468 |                       if 'blueprintName' in blueprint and
469 |                       blueprint['blueprintName'] == blueprint_name), None)
470 |     response = None
471 |     if not blueprint:
472 |         print(f'No existing blueprint found with name={blueprint_name}, creating custom blueprint')
473 |         response = bda_client.create_blueprint(
474 |             blueprintName=blueprint_name,
475 |             type=blueprint_type,
476 |             blueprintStage=blueprint_stage,
477 |             schema=json.dumps(blueprint_schema)
478 |         )
479 |     else:
480 |         print(f'Found existing blueprint with name={blueprint_name}, updating Stage and Schema')
481 |         response = bda_client.update_blueprint(
482 |             blueprintArn=blueprint['blueprintArn'],
483 |             blueprintStage=blueprint_stage,
484 |             schema=json.dumps(blueprint_schema)
485 |         )
486 | 
487 |     return response['blueprint']['blueprintArn']
488 | 
489 | 
490 | def transform_custom_output(input_json, explainability_info):
491 |     result = {
492 |         "forms": {},
493 |         "tables": {}
494 |     }
495 | 
496 |     def add_confidence(value, conf_info):
497 |         return {"value": value, "confidence": conf_info["confidence"]} if isinstance(conf_info, dict) and "confidence" in conf_info else value
498 |     
499 |     def process_list_item(item, conf_info):
500 |         return {k: add_confidence(v, conf_info.get(k, {})) for k, v in item.items() if isinstance(conf_info, dict)}    
501 | 
502 |     # Iterate through the input JSON
503 |     for key, value in input_json.items():
504 |         confidence_data = explainability_info.get(key, {})
505 |         if isinstance(value, list):
506 |             # Handle lists (tables)
507 |             processed_list = []
508 |             for idx, item in enumerate(value):
509 |                 if isinstance(item, dict):
510 |                     # Process each item in the list using its corresponding confidence info
511 |                     conf_info = confidence_data[idx] if isinstance(confidence_data, list) else confidence_data
512 |                     processed_list.append(process_list_item(item, conf_info))
513 |             result["tables"][key] = processed_list
514 |         else:
515 |             # Handle simple key-value pairs (forms)
516 |             result["forms"][key] = add_confidence(value, confidence_data)
517 |             
518 |     return result
519 | 
520 | 
521 | def get_summaries(custom_outputs):
522 |     return [{
523 |         'page_indices': output.get('split_document', {}).get('page_indices'),
524 |         'matched_blueprint_name': output.get('matched_blueprint', {}).get('name'),
525 |         'confidence': output.get('matched_blueprint', {}).get('confidence'),
526 |         'document_class_type': output.get('document_class', {}).get('type')
527 |     } if output else {} for output in custom_outputs]
528 | 
529 | def show_popup_link(label, content, unique_id):
530 |     # Create HTML with CSS and JavaScript
531 |     html_content = f"""
532 |     <style>
533 |     .orange-button {{
534 |         background-color: #FF9800;
535 |         border: none;
536 |         color: white;
537 |         padding: 10px 20px;
538 |         text-align: center;
539 |         text-decoration: none;
540 |         display: inline-block;
541 |         font-size: 16px;
542 |         margin: 4px 2px;
543 |         cursor: pointer;
544 |         border-radius: 4px;
545 |         transition: background-color 0.3s;
546 |     }}
547 |     
548 |     .orange-button:hover {{
549 |         background-color: #F57C00;
550 |     }}
551 |     
552 |     .modal {{
553 |         display: none;
554 |         position: fixed;
555 |         z-index: 1000;
556 |         left: 0;
557 |         top: 0;
558 |         bottom:10px;
559 |         width: 100%;
560 |         height: 100%;
561 |         background-color: rgba(0,0,0,0.4);
562 |         justify-content: center;  # Added
563 |         padding-top: 50px;
564 |     }}
565 |     
566 |     .modal-content {{
567 |         background-color: #fefefe;
568 |         margin: 5% auto;
569 |         padding: 10px;
570 |         border: 1px solid #888;
571 |         width: 80%;
572 |         max-height: 70vh;
573 |         overflow-y: auto;
574 |         position: relative;
575 |     }}
576 |     
577 |     .close-btn {{
578 |         color: #aaa;
579 |         float: right;
580 |         font-size: 28px;
581 |         font-weight: bold;
582 |         cursor: pointer;
583 |     }}
584 |     
585 |     .close-btn:hover {{
586 |         color: black;
587 |     }}
588 |     </style>
589 |     
590 |     <button class="orange-button" onclick="showModal_{unique_id}()">{label}</button>
591 |     
592 |     <div id="instructionModal_{unique_id}" class="modal">
593 |         <div class="modal-content">
594 |             <span class="close-btn" onclick="closeModal_{unique_id}()">&times;</span>
595 |             <pre>{html.escape(content)}</pre>
596 |         </div>
597 |     </div>
598 |     
599 |     <script>
600 |         function showModal_{unique_id}() {{
601 |             document.getElementById("instructionModal_{unique_id}").style.display = "block";
602 |         }}
603 |         
604 |         function closeModal_{unique_id}() {{
605 |             document.getElementById("instructionModal_{unique_id}").style.display = "none";
606 |         }}
607 |         
608 |         // Close modal when clicking outside of it
609 |         window.onclick = function(event) {{
610 |             var modal = document.getElementById("instructionModal_{unique_id}");
611 |             if (event.target == modal) {{
612 |                 modal.style.display = "none";
613 |             }}
614 |         }}
615 |         
616 |         // Close modal when pressing Escape key
617 |         document.addEventListener('keydown', function(event) {{
618 |             if (event.key === "Escape") {{
619 |                 document.getElementById("instructionModal_{unique_id}").style.display = "none";
620 |             }}
621 |         }});
622 | 
623 |     </script>
624 |     """
625 |     
626 |     display(HTML(html_content))
627 |     
628 |         
629 | 


--------------------------------------------------------------------------------