├── 20-Industry-Use-Cases ├── 21-Mortgage-and-Lending │ ├── utils │ │ ├── __init__.py │ │ └── helpers.py │ ├── images │ │ ├── README │ │ └── a_lending_flow_architecture.png │ └── documents │ │ ├── README │ │ ├── lending_package.pdf │ │ ├── lending_package_w2.pdf │ │ ├── lending_package_w2.png │ │ ├── lending_package_check.pdf │ │ ├── lending_package_ID_Card.pdf │ │ ├── lending_package_pay_stub.pdf │ │ ├── lending_package_account_statement.pdf │ │ └── homeowner_insurance_application_sample.pdf └── 22-Medical-Claims-Processing │ ├── data │ ├── images │ │ └── Medical_Claims_Processing_Architecture.png │ ├── agent_resources │ │ └── agent_prompt.txt │ └── blueprint │ │ └── claims_form.json │ ├── utils │ ├── display_functions.py │ ├── bedrock_utils.py │ └── helper_functions.py │ └── assets │ └── lambdas │ ├── delete-efs-volume │ └── index.py │ ├── lifecycle-configuration │ └── index.py │ ├── schema-loader │ └── index.py │ ├── create-vector-index │ └── index.py │ └── claims-review-agent-action │ └── index.py ├── .gitignore ├── 10-Understanding-BDA ├── data │ ├── documents │ │ ├── claim-form.png │ │ ├── claims-pack.pdf │ │ ├── BankStatement.jpg │ │ ├── BankStatement.pdf │ │ └── sample1_cms-1500-P.pdf │ └── blueprints │ │ ├── medical_transcription.json │ │ ├── discharge_summary.json │ │ ├── lab_reports.json │ │ ├── explanation_of_benefits.json │ │ ├── claims_form.json │ │ └── blueprint_schema.json ├── utils │ ├── display_functions.py │ └── helper_functions.py └── 11_getting_started_with_bda.ipynb ├── images └── amazon-bedrock-data-automation-overview.png ├── CODE_OF_CONDUCT.md ├── LICENSE ├── CONTRIBUTING.md └── README.md /20-Industry-Use-Cases/21-Mortgage-and-Lending/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/images/README: -------------------------------------------------------------------------------- 1 | This folder holds images used in the Workbook description. 2 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/README: -------------------------------------------------------------------------------- 1 | This folder holds document images used in the Lending Workbook. 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*checkpoint.ipynb 2 | **/*checkpoint* 3 | **/*.pyc 4 | **/**/.ipynb_checkpoints/* 5 | **checkpoint** 6 | */.ipynb_checkpoints/* 7 | .DS_Store 8 | .log -------------------------------------------------------------------------------- /10-Understanding-BDA/data/documents/claim-form.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/claim-form.png -------------------------------------------------------------------------------- /10-Understanding-BDA/data/documents/claims-pack.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/claims-pack.pdf -------------------------------------------------------------------------------- /images/amazon-bedrock-data-automation-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/images/amazon-bedrock-data-automation-overview.png -------------------------------------------------------------------------------- /10-Understanding-BDA/data/documents/BankStatement.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/BankStatement.jpg -------------------------------------------------------------------------------- /10-Understanding-BDA/data/documents/BankStatement.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/BankStatement.pdf -------------------------------------------------------------------------------- /10-Understanding-BDA/data/documents/sample1_cms-1500-P.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/10-Understanding-BDA/data/documents/sample1_cms-1500-P.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_w2.png -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_check.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_check.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_ID_Card.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_ID_Card.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_pay_stub.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_pay_stub.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/images/a_lending_flow_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/images/a_lending_flow_architecture.png -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_account_statement.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/lending_package_account_statement.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/homeowner_insurance_application_sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/21-Mortgage-and-Lending/documents/homeowner_insurance_application_sample.pdf -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/data/images/Medical_Claims_Processing_Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation/HEAD/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/images/Medical_Claims_Processing_Architecture.png -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | 3 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 4 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 5 | opensource-codeofconduct@amazon.com with any additional questions or comments. 6 | -------------------------------------------------------------------------------- /10-Understanding-BDA/data/blueprints/medical_transcription.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "description": "A standardized medical document containing patient data, clinical findings, and diagnostic information transcribed from healthcare provider notes or dictation. Includes medical terminology and follows structured formats for use in patient care and record-keeping.", 4 | "class": "Medical Transcription", 5 | "type": "object", 6 | "definitions": {}, 7 | "properties": { 8 | "summary": { 9 | "type": "string", 10 | "inferenceType": "explicit", 11 | "instruction": "Summary of the report" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/utils/display_functions.py: -------------------------------------------------------------------------------- 1 | import ipywidgets as widgets 2 | from IPython.display import display 3 | import boto3 4 | 5 | 6 | s3 = boto3.client('s3') 7 | 8 | 9 | def get_view(data, display_function=None): 10 | out = widgets.Output() 11 | with out: 12 | if callable(display_function): 13 | display_function(data) 14 | else: 15 | display(data) 16 | return out 17 | 18 | def display_multiple(views, view_titles = None): 19 | main_tab = widgets.Tab() 20 | for i, view in enumerate(views): 21 | main_tab.children = (*main_tab.children, view) 22 | tab_title = view_titles[i] if view_titles and view_titles[i] else f'Document {i}' 23 | main_tab.set_title(i, title=tab_title) 24 | display(main_tab) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/delete-efs-volume/index.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import time 3 | from crhelper import CfnResource 4 | 5 | helper = CfnResource() 6 | 7 | @helper.create 8 | def create(event, context): 9 | print("No action needed on create") 10 | pass 11 | 12 | @helper.update 13 | def update(event, context): 14 | print("No action needed on update") 15 | pass 16 | 17 | @helper.delete 18 | def delete(event, context): 19 | try: 20 | # Extract the domain ID from the event 21 | domain_id = event['ResourceProperties']['DomainId'] 22 | 23 | # Initialize AWS clients 24 | sagemaker = boto3.client('sagemaker') 25 | efs = boto3.client('efs') 26 | 27 | # Describe the domain to get EFS ID 28 | domain = sagemaker.describe_domain(DomainId=domain_id) 29 | efs_id = domain['HomeEfsFileSystemId'] 30 | 31 | print(f'Deleting mount targets for EFS ID {efs_id}') 32 | # Delete mount targets 33 | 34 | try: 35 | mount_targets = efs.describe_mount_targets(FileSystemId=efs_id)['MountTargets'] 36 | for mt in mount_targets: 37 | efs.delete_mount_target(MountTargetId=mt['MountTargetId']) 38 | 39 | # Wait for mount targets to be deleted with a check 40 | while True: 41 | print(f'Checking mount targets for EFS ID {efs_id}') 42 | response = efs.describe_mount_targets(FileSystemId=efs_id) 43 | if not response['MountTargets']: # If no mount targets exist 44 | print('All mount targets deleted') 45 | break 46 | time.sleep(30) # nosemgrep 47 | print(f'Deleting file system with EFS ID {efs_id}') 48 | # Delete the EFS file system 49 | efs.delete_file_system(FileSystemId=efs_id) 50 | print(f"Successfully deleted EFS {efs_id} for SageMaker Studio domain {domain_id}") 51 | except efs.exceptions.FileSystemNotFound: 52 | print(f"File system {efs_id} doesn't exist") 53 | except Exception: 54 | print(f"Error Deleting file System {efs_id}. Skipping") 55 | 56 | return efs_id 57 | 58 | except Exception as e: 59 | error_message = f"Error deleting EFS for SageMaker Studio domain {domain_id}: {str(e)}" 60 | print(error_message) 61 | raise Exception(error_message) 62 | 63 | def lambda_handler(event, context): 64 | helper(event, context) -------------------------------------------------------------------------------- /10-Understanding-BDA/data/blueprints/discharge_summary.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "description": "A standard discharge summary report used by hospital containing details of the patient, medical provider and key facts on visit, medical assessment and a summary of discharge.", 4 | "class": "Hospital Discharge Summary", 5 | "type": "object", 6 | "definitions": { 7 | "VisitDetails": { 8 | "type": "object", 9 | "properties": { 10 | "admitted_date": { 11 | "type": "string", 12 | "inferenceType": "explicit", 13 | "instruction": "Date of admission in MM-DD-YYYY format" 14 | }, 15 | "discharged_date": { 16 | "type": "string", 17 | "inferenceType": "explicit", 18 | "instruction": "Date of discharge in MM-DD-YYYY format" 19 | }, 20 | "discharged_to": { 21 | "type": "string", 22 | "inferenceType": "explicit", 23 | "instruction": "Where the patient was discharged to" 24 | } 25 | } 26 | }, 27 | "PatientDetails": { 28 | "type": "object", 29 | "properties": { 30 | "name": { 31 | "type": "string", 32 | "inferenceType": "explicit", 33 | "instruction": "Name of the patient" 34 | }, 35 | "gender": { 36 | "type": "string", 37 | "inferenceType": "explicit", 38 | "instruction": "Gender of the patient" 39 | }, 40 | "patient_id": { 41 | "type": "string", 42 | "inferenceType": "explicit", 43 | "instruction": "Unique id of the patient" 44 | } 45 | } 46 | }, 47 | "ProviderDetails": { 48 | "type": "object", 49 | "properties": { 50 | "name": { 51 | "type": "string", 52 | "inferenceType": "explicit", 53 | "instruction": "Name of the provider" 54 | }, 55 | "provider_id": { 56 | "type": "string", 57 | "inferenceType": "explicit", 58 | "instruction": "Unique id of the provider" 59 | } 60 | } 61 | }, 62 | "AssessmentDetails": { 63 | "type": "object", 64 | "properties": { 65 | "reported_symptoms": { 66 | "type": "string", 67 | "inferenceType": "explicit", 68 | "instruction": "Reported symptoms and history of present illness" 69 | } 70 | } 71 | } 72 | }, 73 | "properties": { 74 | "hospital_name": { 75 | "type": "string", 76 | "inferenceType": "explicit", 77 | "instruction": "Name of the hospital" 78 | }, 79 | "hospital_contact": { 80 | "type": "string", 81 | "inferenceType": "explicit", 82 | "instruction": "Contact details of the hospital" 83 | }, 84 | "visit_details": { 85 | "$ref": "#/definitions/VisitDetails" 86 | }, 87 | "patient_details": { 88 | "$ref": "#/definitions/PatientDetails" 89 | }, 90 | "provider_details": { 91 | "$ref": "#/definitions/ProviderDetails" 92 | }, 93 | "assessment_details": { 94 | "$ref": "#/definitions/AssessmentDetails" 95 | }, 96 | "discharge_summary": { 97 | "type": "string", 98 | "inferenceType": "explicit", 99 | "instruction": "Summary of discharge instructions" 100 | } 101 | } 102 | } -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | ## Reporting Bugs/Feature Requests 10 | 11 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 12 | 13 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 14 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 15 | 16 | * A reproducible test case or series of steps 17 | * The version of our code being used 18 | * Any modifications you've made relevant to the bug 19 | * Anything unusual about your environment or deployment 20 | 21 | ## Contributing via Pull Requests 22 | 23 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 24 | 25 | 1. You are working against the latest source on the *main* branch. 26 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 27 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 28 | 29 | To send us a pull request, please: 30 | 31 | 1. Fork the repository. 32 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 33 | 3. Ensure local tests pass. 34 | 4. Commit to your fork using clear commit messages. 35 | 5. Send us a pull request, answering any default questions in the pull request interface. 36 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 37 | 38 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 39 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 40 | 41 | ## Finding contributions to work on 42 | 43 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 44 | 45 | ## Code of Conduct 46 | 47 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 48 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 49 | opensource-codeofconduct@amazon.com with any additional questions or comments. 50 | 51 | ## Security issue notifications 52 | 53 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 54 | 55 | ## Licensing 56 | 57 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 58 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/lifecycle-configuration/index.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import base64 3 | from crhelper import CfnResource 4 | 5 | helper = CfnResource() 6 | client = boto3.client('sagemaker') 7 | 8 | lcc_up1 = '\n'.join(( 9 | '#!/bin/bash', 10 | '', 11 | 'set -ex', 12 | '', 13 | 'if [ ! -z "${SM_JOB_DEF_VERSION}" ]', 14 | 'then', 15 | ' echo "Running in job mode, skip lcc"', 16 | 'else', 17 | ' rm -rf amazon-bedrock-samples', 18 | ' git clone https://github.com/aws-samples/sample-document-processing-with-amazon-bedrock-data-automation.git', 19 | ' mv sample-document-processing-with-amazon-bedrock-data-automation bda-workshop', 20 | ' rm -rf sample-document-processing-with-amazon-bedrock-data-automation', 21 | ' echo "Repo cloned from git"', 22 | 'fi', 23 | '', 24 | )) 25 | 26 | def get_lcc_base64_string(lcc_string): 27 | lcc_bytes = lcc_string.encode("ascii") 28 | base64_lcc_bytes = base64.b64encode(lcc_bytes) 29 | base64_lcc_string = base64_lcc_bytes.decode("ascii") 30 | return base64_lcc_string 31 | 32 | def apply_lcc_to_user_profile(base64_lcc_string, lcc_config_name, profile): 33 | response = client.create_studio_lifecycle_config( 34 | StudioLifecycleConfigName=lcc_config_name, 35 | StudioLifecycleConfigContent=base64_lcc_string, 36 | StudioLifecycleConfigAppType="JupyterLab", 37 | ) 38 | 39 | lcc_arn = response["StudioLifecycleConfigArn"] 40 | update_up = client.update_user_profile( 41 | DomainId=profile.split("|")[1], 42 | UserProfileName=profile.split("|")[0], 43 | UserSettings={ 44 | "JupyterLabAppSettings": { 45 | "DefaultResourceSpec": {"LifecycleConfigArn": lcc_arn}, 46 | "LifecycleConfigArns": [lcc_arn] 47 | } 48 | } 49 | ) 50 | return update_up 51 | 52 | @helper.create 53 | @helper.update 54 | def create_or_update(event, context): 55 | up1 = event["ResourceProperties"]["UserProfile"] 56 | lcc_name_up1 = event["ResourceProperties"]["LCCName"] 57 | try: 58 | if event["RequestType"] == "Update": 59 | try: 60 | response1 = client.delete_studio_lifecycle_config( 61 | StudioLifecycleConfigName=lcc_name_up1 62 | ) 63 | print(response1) 64 | except Exception as e2: 65 | print(e2) 66 | 67 | base64_lcc_up1_string = get_lcc_base64_string(lcc_up1) 68 | updated_up1 = apply_lcc_to_user_profile( 69 | base64_lcc_up1_string, 70 | lcc_name_up1, 71 | up1 72 | ) 73 | print("Response User Profile LCC update for UP1") 74 | print(updated_up1) 75 | 76 | return {"Data": 120} 77 | except Exception as e: 78 | raise e 79 | 80 | @helper.delete 81 | def delete(event, context): 82 | lcc_name_up1 = event["ResourceProperties"]["LCCName"] 83 | 84 | try: 85 | response1 = client.delete_studio_lifecycle_config( 86 | StudioLifecycleConfigName=lcc_name_up1 87 | ) 88 | print(response1) 89 | return {} 90 | except Exception as e: 91 | print(e) 92 | return {"Error": str(e)} 93 | 94 | def lambda_handler(event, context): 95 | print(event) 96 | helper(event, context) 97 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/schema-loader/index.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | from crhelper import CfnResource 4 | 5 | # Initialize the helper 6 | helper = CfnResource() 7 | rds_data_client = boto3.client('rds-data') 8 | 9 | s3_client = boto3.client('s3') 10 | cluster_arn = os.environ['CLUSTER_ARN'] 11 | secret_arn = os.environ['SECRET_ARN'] 12 | database_name = os.environ['DATABASE_NAME'] 13 | create_schema_sql_file = os.environ['CREATE_SCHEMA_FILE'] 14 | delete_schema_sql_file = os.environ['DELETE_SCHEMA_FILE'] 15 | update_schema_sql_file = os.environ.get('UPDATE_SCHEMA_FILE',None) 16 | initial_data_sql_file = os.environ.get('INITIAL_DATA_FILE', None) 17 | 18 | # Initialize the helper 19 | helper = CfnResource() 20 | 21 | rds_data_client = boto3.client('rds-data') 22 | s3_client = boto3.client('s3') 23 | cluster_arn = os.environ['CLUSTER_ARN'] 24 | secret_arn = os.environ['SECRET_ARN'] 25 | database_name = os.environ['DATABASE_NAME'] 26 | create_schema_sql_file = os.environ['CREATE_SCHEMA_FILE'] 27 | delete_schema_sql_file = os.environ['DELETE_SCHEMA_FILE'] 28 | update_schema_sql_file = os.environ.get('UPDATE_SCHEMA_FILE', None) 29 | initial_data_sql_file = os.environ.get('INITIAL_DATA_FILE', None) 30 | 31 | 32 | @helper.create 33 | def create(event, context): 34 | """Handle Create event""" 35 | execute(create_schema_sql_file) 36 | if initial_data_sql_file: 37 | execute(initial_data_sql_file) 38 | return "CustomResourcePhysicalID" 39 | 40 | 41 | @helper.update 42 | def update(event, context): 43 | """Handle Update event""" 44 | if update_schema_sql_file: 45 | execute(update_schema_sql_file) 46 | return "CustomResourcePhysicalID" 47 | 48 | 49 | @helper.delete 50 | def delete(event, context): 51 | """Handle Delete event""" 52 | execute(delete_schema_sql_file) 53 | return "CustomResourcePhysicalID" 54 | 55 | 56 | def handler(event, context): 57 | """Main handler function""" 58 | print(event) 59 | helper(event, context) 60 | 61 | 62 | def execute(sql_file_path:str): 63 | """Create the schema in the database.""" 64 | # Download SQL script from S3 65 | bucket_name, key_name = parse_s3_url(sql_file_path) 66 | sql_script = download_sql_script(bucket_name, key_name) 67 | # Split script into individual statements and execute each one 68 | statements = sql_script.split(';') 69 | for statement in statements: 70 | if statement.strip(): 71 | # Execute each statement 72 | print(f"Executing statement: {statement}") 73 | execute_statement(cluster_arn, secret_arn, database_name, statement) 74 | 75 | 76 | def parse_s3_url(s3_url): 77 | """Parse S3 URL into bucket name and key.""" 78 | s3_url_parts = s3_url.replace("s3://", "").split("/", 1) 79 | return s3_url_parts[0], s3_url_parts[1] 80 | 81 | 82 | def download_sql_script(bucket_name, key_name): 83 | """Download SQL script from S3.""" 84 | response = s3_client.get_object(Bucket=bucket_name, Key=key_name) 85 | return response['Body'].read().decode('utf-8') 86 | 87 | 88 | def execute_statement(cluster_arn, secret_arn, database_name, sql_statement): 89 | """Execute a single SQL statement using RDS Data API.""" 90 | response = rds_data_client.execute_statement( 91 | resourceArn=cluster_arn, 92 | secretArn=secret_arn, 93 | database=database_name, 94 | sql=sql_statement 95 | ) 96 | print(response) 97 | 98 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/create-vector-index/index.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from crhelper import CfnResource 3 | from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth 4 | from time import sleep 5 | 6 | 7 | helper = CfnResource(json_logging=False, log_level='DEBUG', boto_level='CRITICAL', sleep_on_delete=120, ssl_verify=None) 8 | 9 | #No-op for update and delete 10 | @helper.delete 11 | def no_op(event, context): 12 | print("No op for delete") 13 | 14 | #get aoss_host from os environ 15 | def removeHttpsPrefix(endpoint): 16 | """ 17 | This function removes the "https://" prefix from a given endpoint string, 18 | if present, and returns the modified string. 19 | """ 20 | if endpoint.startswith("https://"): 21 | return endpoint[8:] 22 | return endpoint 23 | 24 | def get_aoss_host(resource_properties): 25 | if "AOSSHost" not in resource_properties: 26 | raise Exception("AOSSHost not provided from resource properties") 27 | 28 | return removeHttpsPrefix(resource_properties["AOSSHost"]) 29 | 30 | def get_aoss_client(host): 31 | auth = AWSV4SignerAuth( 32 | boto3.Session().get_credentials(), 33 | boto3.session.Session().region_name, 34 | "aoss" 35 | ) 36 | # create an opensearch client and use the request-signer 37 | return OpenSearch( 38 | hosts=[{'host': host, 'port': 443}], 39 | http_auth=auth, 40 | use_ssl=True, 41 | verify_certs=True, 42 | connection_class=RequestsHttpConnection 43 | ) 44 | def get_aoss_index_name(resource_properties): 45 | if "AOSSIndexName" not in resource_properties: 46 | raise Exception("AOSSIndexName not provided from resource properties") 47 | return resource_properties["AOSSIndexName"] 48 | 49 | #Function to use the opensearch-py library to create an index within an opensearch collection 50 | def create_aoss_index(index_name, aos_client): 51 | index_body = { 52 | "settings": { 53 | "index.knn": True 54 | }, 55 | "mappings": { 56 | "properties": { 57 | "vector": { 58 | "type": "knn_vector", 59 | "dimension": 1024, 60 | "method": { 61 | "name": "hnsw", 62 | "space_type": "l2", 63 | "engine": "faiss", 64 | "parameters": { 65 | "ef_construction": 512, 66 | "m": 16 67 | } 68 | } 69 | }, 70 | "text": { 71 | "type": "text" 72 | }, 73 | "id": { 74 | "type": "text" 75 | }, 76 | "text-metadata": { 77 | "type": "text" 78 | }, 79 | "x-amz-bedrock-kb-source-uri": { 80 | "type": "text" 81 | } 82 | } 83 | } 84 | } 85 | 86 | aos_client.indices.create(index=index_name, body=index_body) 87 | print(f"Created index {index_name}") 88 | 89 | #Handles create event of the CloudFormation resource 90 | @helper.create 91 | @helper.update 92 | def create_or_update_index(event, context): 93 | resource_properties = event['ResourceProperties'] 94 | aoss_host = get_aoss_host(resource_properties) 95 | aos_client = get_aoss_client(aoss_host) 96 | index_name = get_aoss_index_name(resource_properties) 97 | response = None 98 | sleep(60) # nosemgrep 99 | if not aos_client.indices.exists(index=index_name): 100 | response = create_aoss_index(index_name=index_name, aos_client=aos_client) 101 | return response 102 | 103 | def lambda_handler(event, context): 104 | print(event) 105 | helper(event, context) -------------------------------------------------------------------------------- /10-Understanding-BDA/data/blueprints/lab_reports.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "description": "The lab reports document specimen analysis from a medical procedure, including clinical history, gross examination, microscopic findings, and immunostaining results. The report provides detailed measurements and diagnostic markers to support the final medical diagnosis.", 4 | "class": "Lab Reports", 5 | "type": "object", 6 | "definitions": { 7 | "PatientInfo": { 8 | "type": "object", 9 | "properties": { 10 | "name": { 11 | "type": "string", 12 | "inferenceType": "explicit", 13 | "instruction": "The name of the patient" 14 | }, 15 | "medical_record_number": { 16 | "type": "string", 17 | "inferenceType": "explicit", 18 | "instruction": "The patient's medical record number" 19 | }, 20 | "date_of_birth": { 21 | "type": "string", 22 | "inferenceType": "explicit", 23 | "instruction": "The patient's date of birth in MM/DD/YYYY format" 24 | }, 25 | "gender": { 26 | "type": "string", 27 | "inferenceType": "explicit", 28 | "instruction": "The patient's gender" 29 | } 30 | } 31 | }, 32 | "ProcedureInfo": { 33 | "type": "object", 34 | "properties": { 35 | "accession_number": { 36 | "type": "string", 37 | "inferenceType": "explicit", 38 | "instruction": "The unique identifier for the specimen/procedure" 39 | }, 40 | "date": { 41 | "type": "string", 42 | "inferenceType": "explicit", 43 | "instruction": "The date of the procedure in MM/DD/YYYY format" 44 | }, 45 | "attending_physician": { 46 | "type": "string", 47 | "inferenceType": "explicit", 48 | "instruction": "The name of the physician who performed or attended the procedure" 49 | } 50 | } 51 | }, 52 | "ClinicalInfo": { 53 | "type": "object", 54 | "properties": { 55 | "history": { 56 | "type": "string", 57 | "inferenceType": "explicit", 58 | "instruction": "A brief instruction of the patient's symptoms or reason for procedure" 59 | }, 60 | "specimen": { 61 | "type": "string", 62 | "inferenceType": "explicit", 63 | "instruction": "A instruction of the tissue or organ examined" 64 | } 65 | } 66 | }, 67 | "Diagnosis": { 68 | "type": "object", 69 | "properties": { 70 | "diagnosis": { 71 | "type": "string", 72 | "inferenceType": "explicit", 73 | "instruction": "The diagnosis or conclusion" 74 | }, 75 | "tumor_size": { 76 | "type": "string", 77 | "inferenceType": "explicit", 78 | "instruction": "The size of any tumor in cm" 79 | }, 80 | "cell_type": { 81 | "type": "string", 82 | "inferenceType": "explicit", 83 | "instruction": "A instruction of the type of cells observed" 84 | }, 85 | "other_findings": { 86 | "type": "string", 87 | "inferenceType": "explicit", 88 | "instruction": "Other relevant microscopic findings" 89 | } 90 | } 91 | }, 92 | "Immunostains": { 93 | "type": "object", 94 | "properties": { 95 | "positive_markers": { 96 | "type": "string", 97 | "inferenceType": "explicit", 98 | "instruction": "Markers for which the tumor cells tested positive" 99 | }, 100 | "negative_markers": { 101 | "type": "string", 102 | "inferenceType": "explicit", 103 | "instruction": "Markers for which the tumor cells tested negative" 104 | } 105 | } 106 | } 107 | }, 108 | "properties": { 109 | "patient_information": { 110 | "$ref": "#/definitions/PatientInfo" 111 | }, 112 | "procedure_information": { 113 | "$ref": "#/definitions/ProcedureInfo" 114 | }, 115 | "clinical_information": { 116 | "$ref": "#/definitions/ClinicalInfo" 117 | }, 118 | "diagnosis": { 119 | "$ref": "#/definitions/Diagnosis" 120 | }, 121 | "gross_instruction": { 122 | "type": "string", 123 | "inferenceType": "explicit", 124 | "instruction": "A instruction of the appearance of the specimen" 125 | }, 126 | "microscopic_instruction": { 127 | "type": "string", 128 | "inferenceType": "explicit", 129 | "instruction": "A instruction of the tumor cells and tissue under the microscope" 130 | }, 131 | "immunostains": { 132 | "$ref": "#/definitions/Immunostains" 133 | }, 134 | "comment": { 135 | "type": "string", 136 | "inferenceType": "explicit", 137 | "instruction": "Any additional comments or notes" 138 | } 139 | } 140 | } -------------------------------------------------------------------------------- /20-Industry-Use-Cases/21-Mortgage-and-Lending/utils/helpers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import ipywidgets as widgets 3 | import io 4 | 5 | 6 | def pil_to_bytes(image): 7 | byte_arr = io.BytesIO() 8 | image.save(byte_arr, format='PNG') 9 | return byte_arr.getvalue() 10 | 11 | 12 | def display_image(image): 13 | image_widget = widgets.Image(value=pil_to_bytes(image), format='png') 14 | image_widget.layout.width = '400px' 15 | image_widget.layout.height = 'auto' 16 | image_widget.layout.object_fit = 'contain' 17 | return image_widget 18 | 19 | def json_to_html(json_obj, indent=0): 20 | result = [] 21 | if isinstance(json_obj, dict): 22 | result.append('') 23 | for key, value in json_obj.items(): 24 | result.append('') 25 | result.append(f'') 26 | result.append('') 29 | result.append('') 30 | result.append('
{key}') 27 | result.append(json_to_html(value, indent + 1)) 28 | result.append('
') 31 | elif isinstance(json_obj, list): 32 | result.append('') 33 | for i, item in enumerate(json_obj): 34 | result.append('') 35 | result.append(f'') 36 | result.append('') 39 | result.append('') 40 | result.append('
{i}') 37 | result.append(json_to_html(item, indent + 1)) 38 | result.append('
') 41 | elif isinstance(json_obj, (str, int, float, bool)) or json_obj is None: 42 | if isinstance(json_obj, str): 43 | result.append(f'"{json_obj}"') 44 | elif isinstance(json_obj, bool): 45 | result.append(f'{str(json_obj).lower()}') 46 | elif json_obj is None: 47 | result.append('null') 48 | else: 49 | result.append(f'{json_obj}') 50 | return ''.join(result) 51 | 52 | def display_json(json_data, title): 53 | html_content = f""" 54 |
55 |

{title}

56 |
57 | {json_to_html(json_data)} 58 |
59 |
60 | 99 | """ 100 | return widgets.HTML(html_content) 101 | 102 | def display_image_jsons(image, json_arr, titles): 103 | image_widget = display_image(image) 104 | right_column = widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)]) 105 | bordered_hbox = widgets.HBox([image_widget, right_column]) 106 | bordered_hbox.layout.border = '5px solid black' 107 | bordered_hbox.layout.padding = '10px' 108 | bordered_hbox.layout.margin = '10px' 109 | return bordered_hbox 110 | 111 | 112 | def get_s3_to_dict(s3, s3_url): 113 | bucket_name = s3_url.split('/')[2] 114 | object_key = '/'.join(s3_url.split('/')[3:]) 115 | 116 | # Download the JSON file from S3 117 | response = s3.get_object(Bucket=bucket_name, Key=object_key) 118 | json_content = response['Body'].read().decode('utf-8') 119 | 120 | # Parse the JSON content 121 | json_obj = json.loads(json_content) 122 | return json_obj -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/data/agent_resources/agent_prompt.txt: -------------------------------------------------------------------------------- 1 | You are a Claims Reviewer AI assistant. Your task is to review insurance claims following a specific process using provided function calls and a knowledge base. At the end of the review you 2 | would provide a detailed report of the review findings and status. 3 | To finish the review carry out all the steps detailed below carefully and thoroughly. DO NOT ASK THE USER FOR MORE INFORMATION. ALL information is available in the claim form data 4 | 5 | STEP 1 - EXTRACT CLAIM FORM DATA 6 | - To begin with You will be provided with a claim form URI. You must first get the claim form data from S3 using the given URI as input. 7 | - Use the function call get_claim_form_data(claim_form_uri) to get the claim form data. 8 | - Once you have the claim form data, Keep a note of all the fields and their values, you would use all of the fields in the form data in later steps. 9 | 10 | STEP 2 - VERIFY INSURED MEMBER AND PATIENT DETAILS 11 | - Use the insured id number, patient last name and patient date of birth from the claim form data to get the member and patient detail from the claims database 12 | - Compare the insured member details with the details in the claim form data 13 | - for each detail, add an entry to your final report. Use this table format 14 | | Field Name | Claim Form Data | Database Data | Match or No Match | 15 | |------------|-----------------|---------------|-------------------| 16 | - If any discrepancies are found, add a note to your report and stop the process and respond with final report. 17 | - If the insured member and patient details are verified, add a note to your report and continue the process 18 | - Continue to Step 3 19 | 20 | STEP 3 CREATE CLAIM RECORD 21 | - Once and only if the insured member and patient details are matched Use the function call createClaim to create a claim record in the claims database. 22 | - use the data already gathered in the previous step to call the action to create a claim record 23 | 1. The patient details 24 | 2. The insured member details 25 | 3. Fields in the Claim form data 26 | - Use "IN_PROGRESS" as the status of the claim record 27 | - keep a note of the claim id returned after creating the claim data, you will need it later. 28 | - If the claim record is created, add a note to your final report 29 | - If the claim record is not created, add a note to your report and stop the process and respond with final report 30 | - CONTINUE TO STEP 4 31 | 32 | STEP 4. RETRIEVE EVIDENCE OF COVERAGE DETAILS FOR THE INSURANCE PLAN 33 | - Using the insured_plan_name from the insured member detai find a matching document in the Claims Evidence of Coverage Knowledge Base 34 | - STRICTLY USE only the document that matches the insured_plan_name. 35 | - If no document is found, add a note to your report and stop the process and respond with final report. 36 | - If document is found, add a note to your report and continue the process 37 | - CONTINUE TO STEP 5 38 | 39 | STEP 5. EVALUATE COVERAGE 40 | - Use the claim form data to identify the services, treatments, procedures, and charges. 41 | - Add to your note the list of services, treatments, procedures, respective date, place and associated charges. 42 | - Using the details of each of the service, procedure code and charges in the claim form data search the content from evidence of coverage document to determine if that particular service/procedure or treatement it's covered by the specific insurance plan 43 | - Add the findings in your final report in this format along with a snippet of text from the evidence of coverage document that supports your findings 44 | | Service/Procedure | Date | Place | Charges | Covered/Not Covered | Relevant Justification 45 | |-------------------------------------|-----------|------------|---------|-----------------------|----------------------------------------------------------------------| 46 | - For each service/procedure, add an entry to your report. 47 | - CONTINUE TO STEP 6 48 | 49 | STEP 6. UPDATE CLAIM STATUS 50 | - If all services are covered: 51 | * Update the claim record using the claim id to set the status to "ELIGIBLE" 52 | - If some or no services are covered: 53 | * Update the claim record using the claim id to set the status to "ADJUDICATOR_REVIEW" 54 | - CONTINUE TO STEP 7 55 | 56 | STEP 7. Respond with the final report with the following contents 57 | - Table containing the member and patient details and if they match with details in the database 58 | - The table with services/procedures and their coverage status 59 | - State the final claim status (ELIGIBLE or ADJUDICATOR_REVIEW). 60 | 61 | When responding, please provide a thorough analysis following these steps. Be precise in your language, citing specific details from the claim form and EoC document. 62 | If you need any clarification or additional information to complete the review, please ask. Your goal is to ensure accurate and fair claim processing 63 | while adhering to the insurance plan's coverage guidelines. 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Document Processing with Amazon Bedrock Data Automation 2 | 3 | ## How Bedrock Data Automation works 4 | 5 | Bedrock Data Automation (BDA) lets you configure output based on your processing needs for a specific data type: documents, images, video or audio. BDA can generate standard output or custom output. Below are some key concepts for understanding how BDA works. If you're a new user, start with the information about standard output. 6 | 7 | * **Standard output** – Sending a file to BDA with no other information returns the default standard output, which consists of commonly required information that's based on the data type. Examples include audio transcriptions, scene summaries for video, and document summaries. These outputs can be tuned to your use case using projects to modify them. For more information, see e.g. [Standard output for documents in Bedrock Data Automation](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-output-documents.html). 8 | 9 | * **Custom output** – For documents and images, only. Choose custom output to define exactly what information you want to extract using a blueprint. A blueprint consists of a list of expected fields that you want retrieved from a document or image. Each field represents a piece of information that needs to be extracted to meet your specific use case. You can create your own blueprints, or select predefined blueprints from the BDA blueprint catalog. For more information, see [Custom output and blueprints](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-custom-output-idp.html). 10 | 11 | * **Projects** – A project is a BDA resource that allows you to modify and organize output configurations. Each project can contain standard output configurations for documents, images, video, and audio, as well as custom output blueprints for documents and images. Projects are referenced in the `InvokeDataAutomationAsync` API call to instruct BDA on how to process the files. For more information about projects and their use cases, see [Bedrock Data Automation projects](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-projects.html). 12 | 13 | Overview Bedrock Data Automation 14 | 15 | This workshop contains the following sections 16 | 17 | * **1 - Understanding Bedrock Data Automation** 18 | * [Getting Started - How Bedrock Data Automation works](10-Understanding-BDA/11_getting_started_with_bda.ipynb) 19 | * [Document Insights with Standard Outputs](10-Understanding-BDA/12_standard_output_extended.ipynb) 20 | * [Custom Document Insights with Blueprints](10-Understanding-BDA/13_custom_outputs_and_blueprints.ipynb) 21 | * **2 - Industry Use Cases - Document Processing** 22 | * [Mortgage and Lending Flow](20-Industry-Use-Cases/21-Mortgage-and-Lending/21_mortgage_and_lending.ipynb) 23 | * [Medical Claims Processing with Agents](20-Industry-Use-Cases/22-Medical-Claims-Processing/22_medical_claims_processing.ipynb) 24 | 25 | * **3 - Bedrock Data Automation Patterns (Coming Soon)** 26 | 27 | ### Use Cases 28 | 29 | Here are some example use cases that BDA can help you with - 30 | 31 | **Document processing**: Automate Intelligent Document Processing workflows at scale, transforming unstructured documents into structured data outputs that can be customized to integrate with existing systems and workflows. 32 | 33 | **Media analysis**: Extract meaningful insights from unstructured video by creating scene summaries, identifying unsafe/explicit content, extracting text, and classifying content, enabling intelligent video search, contextual advertising, and brand safety/compliance. 34 | 35 | **Generative AI assistants**: Enhance the performance of your retrieval-augmented generation (RAG) powered question answering applications by providing them with rich, modality-specific data representations extracted from your documents, images, video, and audio. 36 | 37 | ### Getting Started 38 | 39 | * Create Jupyterlab space in Amazon Sagemaker Studio or any other environment 40 | * Make sure you have the required IAM role permissions 41 | * Checkout the repository 42 | * Run through the notebooks 43 | 44 | ### Required IAM Permissions 45 | 46 | The features being explored in the notebook require the following IAM Policies for the execution role being used. If you're running this notebook within SageMaker Studio in your own Account, update the default execution role for the SageMaker user profile to include the following IAM policies. 47 | 48 | When using your own AWS Account to run this workshop, use AWS regions `us-east-1` or `us-west-2` where Bedrock Data Automation is available as of this writing. 49 | 50 | ```json 51 | [ 52 | { 53 | "Sid": "BDACreatePermissions", 54 | "Effect": "Allow", 55 | "Action": [ 56 | "bedrock:CreateDataAutomationProject", 57 | "bedrock:CreateBlueprint" 58 | ], 59 | "Resource": "*" 60 | }, 61 | { 62 | "Sid": "BDAOProjectsPermissions", 63 | "Effect": "Allow", 64 | "Action": [ 65 | "bedrock:CreateDataAutomationProject", 66 | "bedrock:UpdateDataAutomationProject", 67 | "bedrock:GetDataAutomationProject", 68 | "bedrock:GetDataAutomationStatus", 69 | "bedrock:ListDataAutomationProjects", 70 | "bedrock:InvokeDataAutomationAsync" 71 | ], 72 | "Resource": "arn:aws:bedrock:::data-automation-project/*" 73 | }, 74 | { 75 | "Sid": "BDABlueprintPermissions", 76 | "Effect": "Allow", 77 | "Action": [ 78 | "bedrock:GetBlueprint", 79 | "bedrock:ListBlueprints", 80 | "bedrock:UpdateBlueprint", 81 | "bedrock:DeleteBlueprint" 82 | ], 83 | "Resource": "arn:aws:bedrock:::blueprint/*" 84 | }, 85 | 86 | 87 | { 88 | "Sid": "BDACrossRegionInference", 89 | "Effect": "Allow", 90 | "Action": ["bedrock:InvokeDataAutomationAsync"], 91 | "Resource": [ 92 | "arn:aws:bedrock:us-east-1:account_id:data-automation-profile/us.data-automation-v1", 93 | "arn:aws:bedrock:us-east-2:account_id:data-automation-profile/us.data-automation-v1", 94 | "arn:aws:bedrock:us-west-1:account_id:data-automation-profile/us.data-automation-v1", 95 | "arn:aws:bedrock:us-west-2:account_id:data-automation-profile/us.data-automation-v1"] 96 | } 97 | ] 98 | ``` 99 | 100 | Note - The policy uses wildcard(s) for demo purposes. AWS recommends using least privileges when defining IAM Policies in your own AWS Accounts. See [Security Best Practices in IAM](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html) 101 | 102 | 103 | ## Contributors 104 | 105 | * Raja Vaidyanathan 106 | * Arlind Nocaj 107 | * Conor Manton 108 | * Luca Perrozzi -------------------------------------------------------------------------------- /10-Understanding-BDA/data/blueprints/explanation_of_benefits.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "description": "A blueprint for a Remittance Advice (RA) or Explanation of Benefits (EOB), which is a standard document sent by insurance companies to detail how a medical claim was processed showing breakdown of charges, what the insurance paid, any discounts and amount due", 4 | "class": "Explanation of Benefits", 5 | "type": "object", 6 | "definitions": { 7 | "PaymentDetail": { 8 | "type": "object", 9 | "properties": { 10 | "paid_to": { 11 | "type": "string", 12 | "inferenceType": "explicit", 13 | "instruction": "Who the payment was made to" 14 | }, 15 | "check_number": { 16 | "type": "string", 17 | "inferenceType": "explicit", 18 | "instruction": "The check number" 19 | }, 20 | "amount": { 21 | "type": "number", 22 | "inferenceType": "explicit", 23 | "instruction": "The payment amount" 24 | } 25 | } 26 | }, 27 | "payment_details": { 28 | "type": "object", 29 | "properties": { 30 | "paid_to": { 31 | "type": "string", 32 | "inferenceType": "explicit", 33 | "instruction": "Who the payment was made to" 34 | }, 35 | "check_number": { 36 | "type": "string", 37 | "inferenceType": "explicit", 38 | "instruction": "The check number" 39 | }, 40 | "amount": { 41 | "type": "number", 42 | "inferenceType": "explicit", 43 | "instruction": "The payment amount" 44 | } 45 | } 46 | }, 47 | "claim_summary": { 48 | "type": "object", 49 | "properties": { 50 | "claim_number": { 51 | "type": "number", 52 | "inferenceType": "explicit", 53 | "instruction": "The claim number" 54 | }, 55 | "patient_name": { 56 | "type": "string", 57 | "inferenceType": "explicit", 58 | "instruction": "The Patient Name associated with the claim" 59 | }, 60 | "billed_amount": { 61 | "type": "number", 62 | "inferenceType": "explicit", 63 | "instruction": "Billed Amount" 64 | }, 65 | "provider_discount": { 66 | "type": "number", 67 | "inferenceType": "explicit", 68 | "instruction": "Provider Discount" 69 | }, 70 | "ucr_amount": { 71 | "type": "number", 72 | "inferenceType": "explicit", 73 | "instruction": "UCR amount in dollars" 74 | }, 75 | "ineligible_amount": { 76 | "type": "number", 77 | "inferenceType": "explicit", 78 | "instruction": "Ineligible Amount in dollars" 79 | }, 80 | "deductible_amount": { 81 | "type": "number", 82 | "inferenceType": "explicit", 83 | "instruction": "Deductible Amount in dollars" 84 | }, 85 | "copay_amount": { 86 | "type": "number", 87 | "inferenceType": "explicit", 88 | "instruction": "the copay amount in dollars" 89 | }, 90 | "payment_amount": { 91 | "type": "number", 92 | "inferenceType": "explicit", 93 | "instruction": "Payment Amount" 94 | } 95 | } 96 | }, 97 | "claim_details": { 98 | "type": "object", 99 | "properties": { 100 | "dates_of_services": { 101 | "type": "string", 102 | "inferenceType": "explicit", 103 | "instruction": "Dates of Services" 104 | }, 105 | "procedure_code": { 106 | "type": "string", 107 | "inferenceType": "explicit", 108 | "instruction": "Procedure Code" 109 | }, 110 | "billed_amount": { 111 | "type": "number", 112 | "inferenceType": "explicit", 113 | "instruction": "Billed Amount in Dollars" 114 | }, 115 | "provider_discount": { 116 | "type": "number", 117 | "inferenceType": "explicit", 118 | "instruction": "Provider Discount in Dollars" 119 | }, 120 | "max_plan_allowable": { 121 | "type": "number", 122 | "inferenceType": "explicit", 123 | "instruction": "Maximum Plan Allowable in Dollars" 124 | }, 125 | "ineligible_amount": { 126 | "type": "number", 127 | "inferenceType": "explicit", 128 | "instruction": "Ineligible Amount in Dollars" 129 | }, 130 | "remark_code": { 131 | "type": "string", 132 | "inferenceType": "explicit", 133 | "instruction": "Remark Code" 134 | }, 135 | "deductible_amount": { 136 | "type": "number", 137 | "inferenceType": "explicit", 138 | "instruction": "Deductible Amount in Dollars" 139 | }, 140 | "copay_amount": { 141 | "type": "number", 142 | "inferenceType": "explicit", 143 | "instruction": "Copay Amount in Dollars" 144 | }, 145 | "paid_at": { 146 | "type": "number", 147 | "inferenceType": "explicit", 148 | "instruction": "Paid at (percentage)" 149 | }, 150 | "payment_amount": { 151 | "type": "number", 152 | "inferenceType": "explicit", 153 | "instruction": "Payment Amount in Dollars" 154 | } 155 | } 156 | } 157 | }, 158 | "properties": { 159 | "employer": { 160 | "type": "string", 161 | "inferenceType": "explicit", 162 | "instruction": "The employer name" 163 | }, 164 | "group_number": { 165 | "type": "string", 166 | "inferenceType": "explicit", 167 | "instruction": "The group number" 168 | }, 169 | "date": { 170 | "type": "string", 171 | "inferenceType": "explicit", 172 | "instruction": "The date" 173 | }, 174 | "check_number": { 175 | "type": "string", 176 | "inferenceType": "explicit", 177 | "instruction": "The check number" 178 | }, 179 | "claim_number": { 180 | "type": "string", 181 | "inferenceType": "explicit", 182 | "instruction": "The claim number" 183 | }, 184 | "patient_name": { 185 | "type": "string", 186 | "inferenceType": "explicit", 187 | "instruction": "The patient name" 188 | }, 189 | "member_id": { 190 | "type": "string", 191 | "inferenceType": "explicit", 192 | "instruction": "The member ID" 193 | }, 194 | "patient_responsibility": { 195 | "type": "number", 196 | "inferenceType": "explicit", 197 | "instruction": "The patient's responsibility amount" 198 | }, 199 | "other_credits_or_adjustments": { 200 | "type": "number", 201 | "inferenceType": "explicit", 202 | "instruction": "Any other credits or adjustments amount" 203 | }, 204 | "total_payment": { 205 | "type": "number", 206 | "inferenceType": "explicit", 207 | "instruction": "The total payment amount" 208 | }, 209 | "paid_to": { 210 | "type": "string", 211 | "inferenceType": "explicit", 212 | "instruction": "Who the payment was made to" 213 | }, 214 | "payment_details": { 215 | "type": "array", 216 | "instruction": "The payment details table", 217 | "items": { 218 | "$ref": "#/definitions/payment_details" 219 | } 220 | }, 221 | "claim_details": { 222 | "type": "array", 223 | "instruction": "details of services that form the part of the claim", 224 | "items": { 225 | "$ref": "#/definitions/claim_details" 226 | } 227 | }, 228 | "claim_summary": { 229 | "$ref": "#/definitions/claim_summary" 230 | } 231 | } 232 | } -------------------------------------------------------------------------------- /10-Understanding-BDA/data/blueprints/claims_form.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "class": "CMS 1500 Claim Form", 4 | "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.", 5 | "definitions": { 6 | "Procedure_Service_Supplies": { 7 | "properties": { 8 | "service_start_date": { 9 | "type": "string", 10 | "inferenceType": "explicit", 11 | "instruction": "The service start date from item 24A in YYYY-MM-DD format" 12 | }, 13 | "service_end_date": { 14 | "type": "string", 15 | "inferenceType": "explicit", 16 | "instruction": "The service end date from item 24A in YYYY-MM-DD format" 17 | }, 18 | "place_of_service": { 19 | "type": "string", 20 | "instruction": "The place the service was provided" 21 | }, 22 | "type_of_service": { 23 | "type": "string", 24 | "instruction": "The type of medical service" 25 | }, 26 | "procedure_modifier": { 27 | "type": "string", 28 | "inferenceType": "explicit", 29 | "instruction": "The procedure modifier from item 24D" 30 | }, 31 | "diagnosis_code": { 32 | "type": "string", 33 | "inferenceType": "explicit", 34 | "instruction": "The diagnosis code from item 24E" 35 | }, 36 | "procedure_code": { 37 | "type": "string", 38 | "instruction": "The procedure code" 39 | }, 40 | "charge_amount": { 41 | "type": "number", 42 | "instruction": "The charge amount for the procedure" 43 | } 44 | } 45 | } 46 | }, 47 | "properties": { 48 | "insurance_program": { 49 | "type": "string", 50 | "inferenceType": "explicit", 51 | "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan" 52 | }, 53 | "insured_id_number": { 54 | "type": "string", 55 | "inferenceType": "explicit", 56 | "instruction": "The insured's ID number from item 1a" 57 | }, 58 | "patient_name": { 59 | "type": "string", 60 | "inferenceType": "explicit", 61 | "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format" 62 | }, 63 | "patient_date_of_birth": { 64 | "type": "string", 65 | "inferenceType": "explicit", 66 | "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format" 67 | }, 68 | "insured_name": { 69 | "type": "string", 70 | "inferenceType": "explicit", 71 | "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format" 72 | }, 73 | "patient_address": { 74 | "type": "string", 75 | "inferenceType": "explicit", 76 | "instruction": "The patient's address from item 5" 77 | }, 78 | "patient_relationship_to_insured": { 79 | "type": "string", 80 | "inferenceType": "explicit", 81 | "instruction": "The patient's relationship to insured from item 6" 82 | }, 83 | "insured_address": { 84 | "type": "string", 85 | "inferenceType": "explicit", 86 | "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code" 87 | }, 88 | "insured_phone_number": { 89 | "type": "string", 90 | "inferenceType": "explicit", 91 | "instruction": "The insured's phone number, including area code from item 7 " 92 | }, 93 | "patient_sex": { 94 | "type": "string", 95 | "inferenceType": "explicit", 96 | "instruction": "The patient's address from item 8" 97 | }, 98 | "patient_marital_status": { 99 | "type": "string", 100 | "inferenceType": "explicit", 101 | "instruction": "The patient's address from item 8" 102 | }, 103 | "patient_condition_related_to": { 104 | "type": "string", 105 | "inferenceType": "explicit", 106 | "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10" 107 | }, 108 | "insured_policy_feca_number": { 109 | "type": "string", 110 | "inferenceType": "explicit", 111 | "instruction": "The insured's policy group or FECA number from item 11" 112 | }, 113 | "insured_date_of_birth": { 114 | "type": "string", 115 | "inferenceType": "explicit", 116 | "instruction": "The insured's policy or group number from item 11a" 117 | }, 118 | "insured_employer_or_school": { 119 | "type": "string", 120 | "inferenceType": "explicit", 121 | "instruction": "The insured's employer or school 11b" 122 | }, 123 | "insured_insurance_plan_name": { 124 | "type": "string", 125 | "inferenceType": "explicit", 126 | "instruction": "The insured's plan name or program name from item 11c" 127 | }, 128 | "another_health_benefit_plan_indicator": { 129 | "type": "boolean", 130 | "inferenceType": "explicit", 131 | "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d" 132 | }, 133 | "patient_signed_date": { 134 | "type": "string", 135 | "inferenceType": "explicit", 136 | "instruction": "patient's or authorized person's signature date from item 12" 137 | }, 138 | "insured_signed_date": { 139 | "type": "string", 140 | "inferenceType": "explicit", 141 | "instruction": "The insured's or authorized person's signed date from item 13" 142 | }, 143 | "illness_injury_date": { 144 | "type": "string", 145 | "inferenceType": "explicit", 146 | "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format" 147 | }, 148 | "previous_illness_date": { 149 | "type": "string", 150 | "inferenceType": "explicit", 151 | "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format" 152 | }, 153 | "unable_to_work_start_date": { 154 | "type": "string", 155 | "inferenceType": "explicit", 156 | "instruction": "The dates the patient was unable to work from item 16" 157 | }, 158 | "unable_to_work_end_date": { 159 | "type": "string", 160 | "inferenceType": "explicit", 161 | "instruction": "The dates the patient was unable to work until item 16" 162 | }, 163 | "referring_physician": { 164 | "type": "string", 165 | "inferenceType": "explicit", 166 | "instruction": "The name of the referring physician from item 17" 167 | }, 168 | "referring_physician_id": { 169 | "type": "string", 170 | "inferenceType": "explicit", 171 | "instruction": "The ID number of the referring physician from item 17a" 172 | }, 173 | "hospitalization_start_date": { 174 | "type": "string", 175 | "inferenceType": "explicit", 176 | "instruction": "The hospitalization start date related to current services from item 18" 177 | }, 178 | "hospitalization_end_date": { 179 | "type": "string", 180 | "inferenceType": "explicit", 181 | "instruction": "The hospitalization end date related to current services from item 18" 182 | }, 183 | "is_outside_lab_indicator": { 184 | "type": "boolean", 185 | "inferenceType": "explicit", 186 | "instruction": "Are there outside lab charges? from item 20" 187 | }, 188 | "outside_lab_charges": { 189 | "type": "string", 190 | "inferenceType": "explicit", 191 | "instruction": "Whether outside lab was used and charges from item 20" 192 | }, 193 | "diagnosis_1": { 194 | "type": "string", 195 | "inferenceType": "explicit", 196 | "instruction": "The diagnosis or nature of illness or injury from item 21.1" 197 | }, 198 | "diagnosis_2": { 199 | "type": "string", 200 | "inferenceType": "explicit", 201 | "instruction": "The diagnosis or nature of illness or injury from item 21.2" 202 | }, 203 | "diagnosis_3": { 204 | "type": "string", 205 | "inferenceType": "explicit", 206 | "instruction": "The diagnosis or nature of illness or injury from item 21.3" 207 | }, 208 | "diagnosis_4": { 209 | "type": "string", 210 | "inferenceType": "explicit", 211 | "instruction": "The diagnosis or nature of illness or injury from item 21.4" 212 | }, 213 | "medicaid_resubmission_number": { 214 | "type": "string", 215 | "inferenceType": "explicit", 216 | "instruction": "MEDICAID RESUBMISSION NUMBER from item 22" 217 | }, 218 | "medicaid_original_ref_number": { 219 | "type": "string", 220 | "inferenceType": "explicit", 221 | "instruction": "Medicaid - Original ref no. from item 22" 222 | }, 223 | "prior_authorization_number": { 224 | "type": "string", 225 | "inferenceType": "explicit", 226 | "instruction": "The prior authorization number from item 23" 227 | }, 228 | "medical_procedures": { 229 | "type": "array", 230 | "instruction": "The list of medical procedures from the table in item 24", 231 | "items": { 232 | "$ref": "#/definitions/Procedure_Service_Supplies" 233 | } 234 | }, 235 | "tax_id_type": { 236 | "type": "string", 237 | "inferenceType": "explicit", 238 | "instruction": "The tax ID type (SSN or EIN) from item 25" 239 | }, 240 | "tax_id_number": { 241 | "type": "string", 242 | "inferenceType": "explicit", 243 | "instruction": "The federal tax ID number (SSN or EIN) from item 25" 244 | }, 245 | "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"}, 246 | "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"} 247 | } 248 | } -------------------------------------------------------------------------------- /10-Understanding-BDA/data/blueprints/blueprint_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "class": "CMS 1500 Claim Form", 4 | "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.", 5 | "definitions": { 6 | "Procedure_Service_Supplies": { 7 | "properties": { 8 | "service_start_date": { 9 | "type": "string", 10 | "inferenceType": "explicit", 11 | "instruction": "The service start date from item 24A in YYYY-MM-DD format" 12 | }, 13 | "service_end_date": { 14 | "type": "string", 15 | "inferenceType": "explicit", 16 | "instruction": "The service end date from item 24A in YYYY-MM-DD format" 17 | }, 18 | "place_of_service": { 19 | "type": "string", 20 | "instruction": "The place the service was provided" 21 | }, 22 | "type_of_service": { 23 | "type": "string", 24 | "instruction": "The type of medical service" 25 | }, 26 | "procedure_modifier": { 27 | "type": "string", 28 | "inferenceType": "explicit", 29 | "instruction": "The procedure modifier from item 24D" 30 | }, 31 | "diagnosis_code": { 32 | "type": "string", 33 | "inferenceType": "explicit", 34 | "instruction": "The diagnosis code from item 24E" 35 | }, 36 | "procedure_code": { 37 | "type": "string", 38 | "instruction": "The procedure code" 39 | }, 40 | "charge_amount": { 41 | "type": "number", 42 | "instruction": "The charge amount for the procedure" 43 | } 44 | } 45 | } 46 | }, 47 | "properties": { 48 | "insurance_program": { 49 | "type": "string", 50 | "inferenceType": "explicit", 51 | "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan" 52 | }, 53 | "insured_id_number": { 54 | "type": "string", 55 | "inferenceType": "explicit", 56 | "instruction": "The insured's ID number from item 1a" 57 | }, 58 | "patient_name": { 59 | "type": "string", 60 | "inferenceType": "explicit", 61 | "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format" 62 | }, 63 | "patient_date_of_birth": { 64 | "type": "string", 65 | "inferenceType": "explicit", 66 | "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format" 67 | }, 68 | "insured_name": { 69 | "type": "string", 70 | "inferenceType": "explicit", 71 | "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format" 72 | }, 73 | "patient_address": { 74 | "type": "string", 75 | "inferenceType": "explicit", 76 | "instruction": "The patient's address from item 5" 77 | }, 78 | "patient_relationship_to_insured": { 79 | "type": "string", 80 | "inferenceType": "explicit", 81 | "instruction": "The patient's relationship to insured from item 6" 82 | }, 83 | "insured_address": { 84 | "type": "string", 85 | "inferenceType": "explicit", 86 | "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code" 87 | }, 88 | "insured_phone_number": { 89 | "type": "string", 90 | "inferenceType": "explicit", 91 | "instruction": "The insured's phone number, including area code from item 7 " 92 | }, 93 | "patient_sex": { 94 | "type": "string", 95 | "inferenceType": "explicit", 96 | "instruction": "The patient's address from item 8" 97 | }, 98 | "patient_marital_status": { 99 | "type": "string", 100 | "inferenceType": "explicit", 101 | "instruction": "The patient's address from item 8" 102 | }, 103 | "patient_condition_related_to": { 104 | "type": "string", 105 | "inferenceType": "explicit", 106 | "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10" 107 | }, 108 | "insured_policy_feca_number": { 109 | "type": "string", 110 | "inferenceType": "explicit", 111 | "instruction": "The insured's policy group or FECA number from item 11" 112 | }, 113 | "insured_date_of_birth": { 114 | "type": "string", 115 | "inferenceType": "explicit", 116 | "instruction": "The insured's policy or group number from item 11a" 117 | }, 118 | "insured_employer_or_school": { 119 | "type": "string", 120 | "inferenceType": "explicit", 121 | "instruction": "The insured's employer or school 11b" 122 | }, 123 | "insured_insurance_plan_name": { 124 | "type": "string", 125 | "inferenceType": "explicit", 126 | "instruction": "The insured's plan name or program name from item 11c" 127 | }, 128 | "another_health_benefit_plan_indicator": { 129 | "type": "boolean", 130 | "inferenceType": "explicit", 131 | "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d" 132 | }, 133 | "patient_signed_date": { 134 | "type": "string", 135 | "inferenceType": "explicit", 136 | "instruction": "patient's or authorized person's signature date from item 12" 137 | }, 138 | "insured_signed_date": { 139 | "type": "string", 140 | "inferenceType": "explicit", 141 | "instruction": "The insured's or authorized person's signed date from item 13" 142 | }, 143 | "illness_injury_date": { 144 | "type": "string", 145 | "inferenceType": "explicit", 146 | "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format" 147 | }, 148 | "previous_illness_date": { 149 | "type": "string", 150 | "inferenceType": "explicit", 151 | "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format" 152 | }, 153 | "unable_to_work_start_date": { 154 | "type": "string", 155 | "inferenceType": "explicit", 156 | "instruction": "The dates the patient was unable to work from item 16" 157 | }, 158 | "unable_to_work_end_date": { 159 | "type": "string", 160 | "inferenceType": "explicit", 161 | "instruction": "The dates the patient was unable to work until item 16" 162 | }, 163 | "referring_physician": { 164 | "type": "string", 165 | "inferenceType": "explicit", 166 | "instruction": "The name of the referring physician from item 17" 167 | }, 168 | "referring_physician_id": { 169 | "type": "string", 170 | "inferenceType": "explicit", 171 | "instruction": "The ID number of the referring physician from item 17a" 172 | }, 173 | "hospitalization_start_date": { 174 | "type": "string", 175 | "inferenceType": "explicit", 176 | "instruction": "The hospitalization start date related to current services from item 18" 177 | }, 178 | "hospitalization_end_date": { 179 | "type": "string", 180 | "inferenceType": "explicit", 181 | "instruction": "The hospitalization end date related to current services from item 18" 182 | }, 183 | "is_outside_lab_indicator": { 184 | "type": "boolean", 185 | "inferenceType": "explicit", 186 | "instruction": "Are there outside lab charges? from item 20" 187 | }, 188 | "outside_lab_charges": { 189 | "type": "string", 190 | "inferenceType": "explicit", 191 | "instruction": "Whether outside lab was used and charges from item 20" 192 | }, 193 | "diagnosis_1": { 194 | "type": "string", 195 | "inferenceType": "explicit", 196 | "instruction": "The diagnosis or nature of illness or injury from item 21.1" 197 | }, 198 | "diagnosis_2": { 199 | "type": "string", 200 | "inferenceType": "explicit", 201 | "instruction": "The diagnosis or nature of illness or injury from item 21.2" 202 | }, 203 | "diagnosis_3": { 204 | "type": "string", 205 | "inferenceType": "explicit", 206 | "instruction": "The diagnosis or nature of illness or injury from item 21.3" 207 | }, 208 | "diagnosis_4": { 209 | "type": "string", 210 | "inferenceType": "explicit", 211 | "instruction": "The diagnosis or nature of illness or injury from item 21.4" 212 | }, 213 | "medicaid_resubmission_number": { 214 | "type": "string", 215 | "inferenceType": "explicit", 216 | "instruction": "MEDICAID RESUBMISSION NUMBER from item 22" 217 | }, 218 | "medicaid_original_ref_number": { 219 | "type": "string", 220 | "inferenceType": "explicit", 221 | "instruction": "Medicaid - Original ref no. from item 22" 222 | }, 223 | "prior_authorization_number": { 224 | "type": "string", 225 | "inferenceType": "explicit", 226 | "instruction": "The prior authorization number from item 23" 227 | }, 228 | "medical_procedures": { 229 | "type": "array", 230 | "instruction": "The list of medical procedures from the table in item 24", 231 | "items": { 232 | "$ref": "#/definitions/Procedure_Service_Supplies" 233 | } 234 | }, 235 | "tax_id_type": { 236 | "type": "string", 237 | "inferenceType": "explicit", 238 | "instruction": "The tax ID type (SSN or EIN) from item 25" 239 | }, 240 | "tax_id_number": { 241 | "type": "string", 242 | "inferenceType": "explicit", 243 | "instruction": "The federal tax ID number (SSN or EIN) from item 25" 244 | }, 245 | "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"}, 246 | "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"} 247 | } 248 | } -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/data/blueprint/claims_form.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "class": "CMS 1500 Claim Form", 4 | "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.", 5 | "definitions": { 6 | "Procedure_Service_Supplies": { 7 | "properties": { 8 | "service_start_date": { 9 | "type": "string", 10 | "inferenceType": "explicit", 11 | "instruction": "The service start date from item 24A in YYYY-MM-DD format" 12 | }, 13 | "service_end_date": { 14 | "type": "string", 15 | "inferenceType": "explicit", 16 | "instruction": "The service end date from item 24A in YYYY-MM-DD format" 17 | }, 18 | "place_of_service": { 19 | "type": "string", 20 | "instruction": "The place the service was provided" 21 | }, 22 | "type_of_service": { 23 | "type": "string", 24 | "instruction": "The type of medical service" 25 | }, 26 | "procedure_modifier": { 27 | "type": "string", 28 | "inferenceType": "explicit", 29 | "instruction": "The procedure modifier from item 24D" 30 | }, 31 | "diagnosis_code": { 32 | "type": "string", 33 | "inferenceType": "explicit", 34 | "instruction": "The diagnosis code from item 24E" 35 | }, 36 | "procedure_code": { 37 | "type": "string", 38 | "instruction": "The procedure code" 39 | }, 40 | "charge_amount": { 41 | "type": "number", 42 | "instruction": "The charge amount for the procedure" 43 | } 44 | } 45 | } 46 | }, 47 | "properties": { 48 | "insurance_program": { 49 | "type": "string", 50 | "inferenceType": "explicit", 51 | "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan" 52 | }, 53 | "insured_id_number": { 54 | "type": "string", 55 | "inferenceType": "explicit", 56 | "instruction": "The insured's ID number from item 1a" 57 | }, 58 | "patient_name": { 59 | "type": "string", 60 | "inferenceType": "explicit", 61 | "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format" 62 | }, 63 | "patient_date_of_birth": { 64 | "type": "string", 65 | "inferenceType": "explicit", 66 | "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format" 67 | }, 68 | "insured_name": { 69 | "type": "string", 70 | "inferenceType": "explicit", 71 | "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format" 72 | }, 73 | "patient_address": { 74 | "type": "string", 75 | "inferenceType": "explicit", 76 | "instruction": "The patient's address from item 5" 77 | }, 78 | "patient_relationship_to_insured": { 79 | "type": "string", 80 | "inferenceType": "explicit", 81 | "instruction": "The patient's relationship to insured from item 6" 82 | }, 83 | "insured_address": { 84 | "type": "string", 85 | "inferenceType": "explicit", 86 | "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code" 87 | }, 88 | "insured_phone_number": { 89 | "type": "string", 90 | "inferenceType": "explicit", 91 | "instruction": "The insured's phone number, including area code from item 7 " 92 | }, 93 | "patient_sex": { 94 | "type": "string", 95 | "inferenceType": "explicit", 96 | "instruction": "The patient's address from item 8" 97 | }, 98 | "patient_marital_status": { 99 | "type": "string", 100 | "inferenceType": "explicit", 101 | "instruction": "The patient's address from item 8" 102 | }, 103 | "patient_condition_related_to": { 104 | "type": "string", 105 | "inferenceType": "explicit", 106 | "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10" 107 | }, 108 | "insured_policy_feca_number": { 109 | "type": "string", 110 | "inferenceType": "explicit", 111 | "instruction": "The insured's policy group or FECA number from item 11" 112 | }, 113 | "insured_date_of_birth": { 114 | "type": "string", 115 | "inferenceType": "explicit", 116 | "instruction": "The insured's policy or group number from item 11a" 117 | }, 118 | "insured_employer_or_school": { 119 | "type": "string", 120 | "inferenceType": "explicit", 121 | "instruction": "The insured's employer or school 11b" 122 | }, 123 | "insured_insurance_plan_name": { 124 | "type": "string", 125 | "inferenceType": "explicit", 126 | "instruction": "The insured's plan name or program name from item 11c" 127 | }, 128 | "another_health_benefit_plan_indicator": { 129 | "type": "boolean", 130 | "inferenceType": "explicit", 131 | "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d" 132 | }, 133 | "patient_signed_date": { 134 | "type": "string", 135 | "inferenceType": "explicit", 136 | "instruction": "patient's or authorized person's signature date from item 12" 137 | }, 138 | "insured_signed_date": { 139 | "type": "string", 140 | "inferenceType": "explicit", 141 | "instruction": "The insured's or authorized person's signed date from item 13" 142 | }, 143 | "illness_injury_date": { 144 | "type": "string", 145 | "inferenceType": "explicit", 146 | "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format" 147 | }, 148 | "previous_illness_date": { 149 | "type": "string", 150 | "inferenceType": "explicit", 151 | "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format" 152 | }, 153 | "unable_to_work_start_date": { 154 | "type": "string", 155 | "inferenceType": "explicit", 156 | "instruction": "The dates the patient was unable to work from item 16" 157 | }, 158 | "unable_to_work_end_date": { 159 | "type": "string", 160 | "inferenceType": "explicit", 161 | "instruction": "The dates the patient was unable to work until item 16" 162 | }, 163 | "referring_physician": { 164 | "type": "string", 165 | "inferenceType": "explicit", 166 | "instruction": "The name of the referring physician from item 17" 167 | }, 168 | "referring_physician_id": { 169 | "type": "string", 170 | "inferenceType": "explicit", 171 | "instruction": "The ID number of the referring physician from item 17a" 172 | }, 173 | "hospitalization_start_date": { 174 | "type": "string", 175 | "inferenceType": "explicit", 176 | "instruction": "The hospitalization start date related to current services from item 18" 177 | }, 178 | "hospitalization_end_date": { 179 | "type": "string", 180 | "inferenceType": "explicit", 181 | "instruction": "The hospitalization end date related to current services from item 18" 182 | }, 183 | "is_outside_lab_indicator": { 184 | "type": "boolean", 185 | "inferenceType": "explicit", 186 | "instruction": "Are there outside lab charges? from item 20" 187 | }, 188 | "outside_lab_charges": { 189 | "type": "string", 190 | "inferenceType": "explicit", 191 | "instruction": "Whether outside lab was used and charges from item 20" 192 | }, 193 | "diagnosis_1": { 194 | "type": "string", 195 | "inferenceType": "explicit", 196 | "instruction": "The diagnosis or nature of illness or injury from item 21.1" 197 | }, 198 | "diagnosis_2": { 199 | "type": "string", 200 | "inferenceType": "explicit", 201 | "instruction": "The diagnosis or nature of illness or injury from item 21.2" 202 | }, 203 | "diagnosis_3": { 204 | "type": "string", 205 | "inferenceType": "explicit", 206 | "instruction": "The diagnosis or nature of illness or injury from item 21.3" 207 | }, 208 | "diagnosis_4": { 209 | "type": "string", 210 | "inferenceType": "explicit", 211 | "instruction": "The diagnosis or nature of illness or injury from item 21.4" 212 | }, 213 | "medicaid_resubmission_number": { 214 | "type": "string", 215 | "inferenceType": "explicit", 216 | "instruction": "MEDICAID RESUBMISSION NUMBER from item 22" 217 | }, 218 | "medicaid_original_ref_number": { 219 | "type": "string", 220 | "inferenceType": "explicit", 221 | "instruction": "Medicaid - Original ref no. from item 22" 222 | }, 223 | "prior_authorization_number": { 224 | "type": "string", 225 | "inferenceType": "explicit", 226 | "instruction": "The prior authorization number from item 23" 227 | }, 228 | "medical_procedures": { 229 | "type": "array", 230 | "instruction": "The list of medical procedures from the table in item 24", 231 | "items": { 232 | "$ref": "#/definitions/Procedure_Service_Supplies" 233 | } 234 | }, 235 | "tax_id_type": { 236 | "type": "string", 237 | "inferenceType": "explicit", 238 | "instruction": "The tax ID type (SSN or EIN) from item 25" 239 | }, 240 | "tax_id_number": { 241 | "type": "string", 242 | "inferenceType": "explicit", 243 | "instruction": "The federal tax ID number (SSN or EIN) from item 25" 244 | }, 245 | "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"}, 246 | "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"} 247 | } 248 | } -------------------------------------------------------------------------------- /10-Understanding-BDA/utils/display_functions.py: -------------------------------------------------------------------------------- 1 | import ipywidgets as widgets 2 | from IPython.display import display, HTML 3 | import pandas as pd 4 | from PIL import Image 5 | import io 6 | import boto3 7 | from urllib.parse import urlparse 8 | from pdf2image import convert_from_bytes 9 | 10 | 11 | s3 = boto3.client('s3') 12 | 13 | 14 | onclick_function = """ 15 | 41 | """ 42 | 43 | def load_image(uri): 44 | if uri.startswith('s3://'): 45 | bucket, key = urlparse(uri).netloc, urlparse(uri).path.lstrip('/') 46 | file_content = s3.get_object(Bucket=bucket, Key=key)['Body'].read() 47 | else: 48 | file_content = open(uri, 'rb').read() 49 | 50 | if uri.lower().endswith('.pdf'): 51 | img_io = io.BytesIO() 52 | convert_from_bytes(file_content)[0].save(img_io, format='JPEG') 53 | return img_io.getvalue() 54 | 55 | img = Image.open(io.BytesIO(file_content)) 56 | if img.format != 'JPEG': 57 | img_io = io.BytesIO() 58 | img.save(img_io, format='JPEG') 59 | return img_io.getvalue() 60 | return file_content 61 | 62 | 63 | def get_kv_html(kv_pairs): 64 | # Create key-value pairs display 65 | kv_html = onclick_function 66 | kv_html += """ 67 |
68 | 69 | 75 | """ 76 | 77 | for i, (key, (value, confidence)) in enumerate(kv_pairs.items()): 78 | kv_html += '' 81 | kv_html += """ 82 |
' 79 | kv_html += create_key_value_box(key, value, confidence) 80 | kv_html += '
83 |
84 | """ 85 | return kv_html 86 | 87 | def create_key_value_box(key, value, confidence): 88 | html = f""" 89 |
99 |
105 |
{key}
106 |
{confidence}
113 |
114 |
{value}
115 |
116 | """ 117 | return html 118 | 119 | def display_result(document_image_uri, kvpairs): 120 | # Create the layout with top alignment 121 | main_hbox_layout = widgets.Layout( 122 | width='100%', 123 | display='flex', 124 | flex_flow='row nowrap', 125 | align_items='stretch', 126 | margin='0' 127 | ) 128 | 129 | image_widget = widgets.Image( 130 | value=b'', 131 | format='png', 132 | width='auto', 133 | height='auto' 134 | ) 135 | image_widget.value = load_image(image_path=document_image_uri) 136 | image_container = widgets.Box( 137 | children=[image_widget], 138 | layout=widgets.Layout( 139 | border='1px solid #888', 140 | padding='1px', 141 | margin='2px', 142 | width='70%', 143 | flex='0 0 70%', 144 | min_width='300px', 145 | height='auto', 146 | display='flex', 147 | align_items='stretch', 148 | justify_content='center' 149 | ) 150 | ) 151 | kv_html = get_kv_html(kvpairs) 152 | # Add content to the Forms tab 153 | result_widget = widgets.HTML( 154 | value=kv_html, 155 | layout=widgets.Layout( 156 | border='0px solid #888', 157 | width='100%', 158 | height='10px', 159 | flex='0 0 100%', # flex: grow shrink basis 160 | margin='5px', 161 | min_width='300px' 162 | ) 163 | ) 164 | result_container = widgets.VBox( 165 | children=[result_widget], 166 | layout=widgets.Layout( 167 | border='0px solid #888', 168 | padding='4px', 169 | margin='5px', 170 | width='30%', 171 | flex='0 0 30%', 172 | min_width='200px', 173 | justify_content='center' 174 | ) 175 | ) 176 | # Add custom CSS for scrollable container 177 | custom_style = """ 178 | 189 | """ 190 | display(HTML(custom_style)) 191 | # Create the main layout 192 | main_layout = widgets.HBox( 193 | children=[image_container, result_container], 194 | layout=main_hbox_layout 195 | ) 196 | # Add the scrollable class to the right VBox 197 | result_widget.add_class('scrollable-vbox') 198 | main_layout.add_class('main-container') 199 | # Display the main layout 200 | display(main_layout) 201 | 202 | def display_multiple(views, view_titles = None): 203 | main_tab = widgets.Tab() 204 | for i, view in enumerate(views): 205 | main_tab.children = (*main_tab.children, view) 206 | tab_title = view_titles[i] if view_titles and view_titles[i] else f'Document {i}' 207 | main_tab.set_title(i, title=tab_title) 208 | display(main_tab) 209 | 210 | def create_form_view(forms_data): 211 | 212 | styles = """ 213 | 224 | """ 225 | 226 | def render_nested_keys(data): 227 | if not isinstance(data, dict): 228 | return f'
{data}
' 229 | html = "" 230 | for key, value in data.items(): 231 | if isinstance(value, dict) and 'value' in value: 232 | conf = value.get('confidence', 0) * 100 233 | html += f""" 234 |
235 |
{key}
236 |
237 |
{value['value']}
238 |
{conf:.1f}%
239 |
240 |
""" 241 | else: 242 | html += f""" 243 |
244 |
{key}
245 |
{render_nested_keys(value)}
246 |
""" 247 | return html 248 | 249 | return HTML(f"{styles}
{render_nested_keys(forms_data)}
") 250 | 251 | 252 | def create_table_view(tables_data): 253 | styles = """ 254 | 285 | """ 286 | 287 | def process_table(table_data): 288 | def format_cell(cell): 289 | if isinstance(cell, dict) and 'value' in cell: 290 | conf = f"({cell.get('confidence', 0):.1%})" if 'confidence' in cell else "" 291 | return f"{cell['value']}{conf}" 292 | return str(cell) 293 | 294 | return pd.DataFrame([{k: format_cell(v) for k, v in row.items()} for row in table_data]) 295 | 296 | tables_html = "".join( 297 | f""" 298 |
299 |

{table_name}

300 |
301 | {process_table(table_data).to_html(classes='table-view', index=False, escape=False)} 302 |
303 |
304 | """ 305 | for table_name, table_data in tables_data.items() if table_data 306 | ) 307 | 308 | return HTML(f"{styles}{tables_html}") 309 | 310 | def segment_view(document_image_uris, inference_result): 311 | # Create the layout with top alignment 312 | main_hbox_layout = widgets.Layout( 313 | width='100%', 314 | display='flex', 315 | flex_flow='row nowrap', 316 | align_items='stretch', 317 | margin='0' 318 | ) 319 | image_widget = widgets.Image( 320 | value=b'', 321 | format='png', 322 | width='auto', 323 | height='auto' 324 | ) 325 | image_widget.value = load_image(uri=document_image_uris[0]) 326 | image_container = widgets.VBox( 327 | children=[image_widget], 328 | layout=widgets.Layout( 329 | border='0px solid #888', 330 | padding='1px', 331 | margin='2px', 332 | width='60%', 333 | flex='0 0 60%', 334 | min_width='300px', 335 | height='auto', 336 | display='flex', 337 | align_items='stretch', 338 | justify_content='center' 339 | ) 340 | ) 341 | 342 | 343 | # Create tabs for different views 344 | tab = widgets.Tab( 345 | layout=widgets.Layout( 346 | width='40%', 347 | flex='0 0 40%', 348 | min_width='300px', 349 | height='auto' 350 | ) 351 | ) 352 | form_view = widgets.Output() 353 | table_view = widgets.Output() 354 | 355 | with form_view: 356 | display(create_form_view(inference_result['forms'])) 357 | 358 | with table_view: 359 | display(create_table_view(inference_result['tables'])) 360 | 361 | tab.children = [form_view, table_view] 362 | tab.set_title(0, 'Key Value Pairs') 363 | tab.set_title(1, 'Tables') 364 | 365 | 366 | # Add custom CSS for scrollable container 367 | custom_style = """ 368 | 383 | """ 384 | display(HTML(custom_style)) 385 | 386 | # Create the main layout 387 | main_layout = widgets.HBox( 388 | children=[image_container, tab], 389 | layout=main_hbox_layout 390 | ) 391 | 392 | 393 | # Add the scrollable class to the right VBox 394 | main_layout.add_class('main-container') 395 | return main_layout 396 | 397 | 398 | def get_view(data, display_function=None): 399 | out = widgets.Output() 400 | with out: 401 | if callable(display_function): 402 | display_function(data) 403 | else: 404 | display(data) 405 | return out -------------------------------------------------------------------------------- /10-Understanding-BDA/utils/helper_functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import boto3 4 | from urllib.parse import urlparse 5 | import requests 6 | import base64 7 | import io 8 | from PIL import Image 9 | from PyPDF2 import PdfReader, PdfWriter 10 | from botocore.exceptions import ClientError 11 | from IPython.display import HTML 12 | from IPython.display import display 13 | from botocore.auth import SigV4Auth 14 | from botocore.awsrequest import AWSRequest 15 | import json 16 | import ipywidgets as widgets 17 | import pandas as pd 18 | 19 | 20 | s3_client = boto3.client("s3") 21 | bda_client = boto3.client('bedrock-data-automation') 22 | bda_runtime_client = boto3.client('bedrock-data-automation-runtime') 23 | 24 | 25 | def pil_to_bytes(image): 26 | byte_arr = io.BytesIO() 27 | image.save(byte_arr, format='PNG') 28 | return byte_arr.getvalue() 29 | 30 | 31 | def display_image(image): 32 | image_widget = widgets.Image(value=pil_to_bytes(image), format='png') 33 | image_widget.layout.width = '400px' 34 | image_widget.layout.height = 'auto' 35 | image_widget.layout.object_fit = 'contain' 36 | return image_widget 37 | 38 | 39 | def json_to_html(json_obj, indent=0): 40 | result = [] 41 | if isinstance(json_obj, dict): 42 | result.append('') 43 | for key, value in json_obj.items(): 44 | result.append('') 45 | result.append(f'') 46 | result.append('') 49 | result.append('') 50 | result.append('
{key}') 47 | result.append(json_to_html(value, indent + 1)) 48 | result.append('
') 51 | elif isinstance(json_obj, list): 52 | result.append('') 53 | for i, item in enumerate(json_obj): 54 | result.append('') 55 | result.append(f'') 56 | result.append('') 59 | result.append('') 60 | result.append('
{i}') 57 | result.append(json_to_html(item, indent + 1)) 58 | result.append('
') 61 | elif isinstance(json_obj, (str, int, float, bool)) or json_obj is None: 62 | if isinstance(json_obj, str): 63 | result.append(f'"{json_obj}"') 64 | elif isinstance(json_obj, bool): 65 | result.append(f'{str(json_obj).lower()}') 66 | elif json_obj is None: 67 | result.append('null') 68 | else: 69 | result.append(f'{json_obj}') 70 | return ''.join(result) 71 | 72 | def display_json(json_data, title): 73 | html_content = f""" 74 |
75 |

{title}

76 |
77 | {json_to_html(json_data)} 78 |
79 |
80 | 119 | """ 120 | return widgets.HTML(html_content) 121 | 122 | def display_image_jsons(image, json_arr, titles): 123 | image_widget = display_image(image) 124 | right_column = widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)]) 125 | bordered_hbox = widgets.HBox([image_widget, right_column]) 126 | bordered_hbox.layout.border = '5px solid black' 127 | bordered_hbox.layout.padding = '10px' 128 | bordered_hbox.layout.margin = '10px' 129 | return bordered_hbox 130 | 131 | def get_bucket_and_key(s3_uri): 132 | parsed_uri = urlparse(s3_uri) 133 | bucket_name = parsed_uri.netloc 134 | object_key = parsed_uri.path.lstrip('/') 135 | return (bucket_name, object_key) 136 | 137 | def wait_for_job_to_complete(invocationArn): 138 | get_status_response = wait_for_completion( 139 | client=bda_runtime_client, 140 | get_status_function=bda_runtime_client.get_data_automation_status, 141 | status_kwargs={'invocationArn': invocationArn}, 142 | completion_states=['Success'], 143 | error_states=['ClientError', 'ServiceError'], 144 | status_path_in_response='status', 145 | max_iterations=15, 146 | delay=30 147 | ) 148 | return get_status_response 149 | 150 | 151 | def read_s3_object(s3_uri): 152 | # Parse the S3 URI 153 | parsed_uri = urlparse(s3_uri) 154 | bucket_name = parsed_uri.netloc 155 | object_key = parsed_uri.path.lstrip('/') 156 | # Create an S3 client 157 | s3_client = boto3.client('s3') 158 | try: 159 | # Get the object from S3 160 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 161 | 162 | # Read the content of the object 163 | content = response['Body'].read().decode('utf-8') 164 | return content 165 | except Exception as e: 166 | print(f"Error reading S3 object: {e}") 167 | return None 168 | 169 | def download_document(url, start_page_index=None, end_page_index=None, output_file_path=None): 170 | 171 | if not output_file_path: 172 | filename = os.path.basename(url) 173 | output_file_path = filename 174 | 175 | # Download the PDF 176 | response = requests.get(url, timeout=30) # nosemgrep 177 | print(response) 178 | pdf_content = io.BytesIO(response.content) 179 | 180 | # Create a PDF reader object 181 | pdf_reader = PdfReader(pdf_content) 182 | 183 | # Create a PDF writer object 184 | pdf_writer = PdfWriter() 185 | 186 | start_page_index = 0 if not start_page_index else max(start_page_index,0) 187 | end_page_index = len(pdf_reader.pages)-1 if not end_page_index else min(end_page_index,len(pdf_reader.pages)-1) 188 | 189 | # Specify the pages you want to extract (0-indexed) 190 | pages_to_extract = list(range(start_page_index, end_page_index)) 191 | 192 | # Add the specified pages to the writer 193 | for page_num in pages_to_extract: 194 | page = pdf_reader.pages[page_num] 195 | pdf_writer.add_page(page) 196 | 197 | print(f"Created file: {output_file_path}") 198 | # Save the extracted pages to a new PDF 199 | with open(output_file_path, "wb") as output_file: 200 | pdf_writer.write(output_file) 201 | return output_file_path 202 | 203 | def create_image_html_column(row: pd.Series, image_col: str, width: str = '300px') -> str: 204 | """ 205 | Create HTML embedded image from S3 URI by downloading and base64 encoding the image for a DataFrame row. 206 | 207 | Args: 208 | row (pd.Series): DataFrame row 209 | image_col (str): Name of column containing S3 URI 210 | width (str): Fixed width for image 211 | 212 | Returns: 213 | str: HTML string for embedded image 214 | """ 215 | s3_uri = row[image_col] 216 | if isinstance(s3_uri, list): 217 | s3_uri = s3_uri[0] 218 | if pd.isna(s3_uri): 219 | return '' 220 | 221 | try: 222 | # Parse S3 URI 223 | bucket_name, object_key = get_bucket_and_key(s3_uri) 224 | 225 | 226 | # Initialize S3 client 227 | s3_client = boto3.client('s3') 228 | 229 | # Download image from S3 230 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 231 | image_content = response['Body'].read() 232 | 233 | # Open image using PIL 234 | image = Image.open(io.BytesIO(image_content)) 235 | 236 | # Convert image to RGB if it's in RGBA mode 237 | if image.mode == 'RGBA': 238 | image = image.convert('RGB') 239 | 240 | # Save image to bytes 241 | buffered = io.BytesIO() 242 | image.save(buffered, format="JPEG") 243 | 244 | # Encode image to base64 245 | img_str = base64.b64encode(buffered.getvalue()).decode() 246 | 247 | # Create HTML string with base64 encoded image 248 | return f'' 249 | except Exception as e: 250 | print(f"Error processing image {s3_uri}: {str(e)}") 251 | return '' 252 | 253 | # Example usage: 254 | """ 255 | # Add embedded images column 256 | df['embedded_images'] = add_embedded_images(df, 'crop_images', width='300px') 257 | 258 | # For Jupyter notebook display: 259 | from IPython.display import HTML 260 | HTML(df['embedded_images'].iloc[0]) 261 | """ 262 | 263 | 264 | 265 | def wait_for_completion( 266 | client, 267 | get_status_function, 268 | status_kwargs, 269 | status_path_in_response, 270 | completion_states, 271 | error_states, 272 | max_iterations=60, 273 | delay=10 274 | ): 275 | for _ in range(max_iterations): 276 | try: 277 | response = get_status_function(**status_kwargs) 278 | status = get_nested_value(response, status_path_in_response) 279 | 280 | if status in completion_states: 281 | print(f"Operation completed successfully with status: {status}") 282 | return response 283 | 284 | if status in error_states: 285 | raise Exception(f"Operation failed with status: {status}") 286 | 287 | print(f"Current status: {status}. Waiting...") 288 | time.sleep(delay) # nosemgrep 289 | 290 | except ClientError as e: 291 | raise Exception(f"Error checking status: {str(e)}") 292 | 293 | raise Exception(f"Operation timed out after {max_iterations} iterations") 294 | 295 | 296 | def get_nested_value(data, path): 297 | """ 298 | Retrieve a value from a nested dictionary using a dot-separated path. 299 | 300 | :param data: The dictionary to search 301 | :param path: A string representing the path to the value, e.g., "Job.Status" 302 | :return: The value at the specified path, or None if not found 303 | """ 304 | keys = path.split('.') 305 | for key in keys: 306 | if isinstance(data, dict) and key in data: 307 | data = data[key] 308 | else: 309 | return None 310 | return data 311 | 312 | 313 | def display_html(data, root='root', expanded=True, bg_color='#f0f0f0'): 314 | html = f""" 315 |
316 | 317 |
{data}
318 |
319 | 334 | """ 335 | display(HTML(html)) 336 | 337 | def send_request(region, url, method, credentials, payload=None, service='bedrock'): 338 | host = url.split("/")[2] 339 | request = AWSRequest( 340 | method, 341 | url, 342 | data=payload, 343 | headers={'Host': host, 'Content-Type':'application/json'} 344 | ) 345 | SigV4Auth(credentials, service, region).add_auth(request) 346 | response = requests.request(method, url, headers=dict(request.headers), data=payload, timeout=50) 347 | response.raise_for_status() 348 | content = response.content.decode("utf-8") 349 | data = json.loads(content) 350 | return data 351 | 352 | def invoke_blueprint_recommendation_async(bda_client, payload): 353 | credentials = boto3.Session().get_credentials().get_frozen_credentials() 354 | region_name = boto3.Session().region_name 355 | url = f"{bda_client.meta.endpoint_url}/invokeBlueprintRecommendationAsync" 356 | print(f'Sending request to {url}') 357 | result = send_request( 358 | region = region_name, 359 | url = url, 360 | method = "POST", 361 | credentials = credentials, 362 | payload=payload 363 | ) 364 | return result 365 | 366 | 367 | def get_blueprint_recommendation(bda_client, job_id): 368 | credentials = boto3.Session().get_credentials().get_frozen_credentials() 369 | region_name = boto3.Session().region_name 370 | url = f"{bda_client.meta.endpoint_url}/getBlueprintRecommendation/{job_id}/" 371 | result = send_request( 372 | region = region_name, 373 | url = url, 374 | method = "POST", 375 | credentials = credentials 376 | ) 377 | return result 378 | 379 | def get_s3_to_dict(s3_url): 380 | bucket_name = s3_url.split('/')[2] 381 | object_key = '/'.join(s3_url.split('/')[3:]) 382 | 383 | # Download the JSON file from S3 384 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 385 | json_content = response['Body'].read().decode('utf-8') 386 | 387 | # Parse the JSON content 388 | json_obj = json.loads(json_content) 389 | return json_obj 390 | 391 | def create_or_update_blueprint(bda_client, blueprint_name, blueprint_description, blueprint_type, blueprint_stage, blueprint_schema): 392 | list_blueprints_response = bda_client.list_blueprints( 393 | blueprintStageFilter='ALL' 394 | ) 395 | blueprint = next((blueprint for blueprint in 396 | list_blueprints_response['blueprints'] 397 | if 'blueprintName' in blueprint and 398 | blueprint['blueprintName'] == blueprint_name), None) 399 | 400 | if not blueprint: 401 | print(f'No existing blueprint found with name={blueprint_name}, creating custom blueprint') 402 | response = bda_client.create_blueprint( 403 | blueprintName=blueprint_name, 404 | type=blueprint_type, 405 | blueprintStage=blueprint_stage, 406 | schema=json.dumps(blueprint_schema) 407 | ) 408 | else: 409 | print(f'Found existing blueprint with name={blueprint_name}, updating Stage and Schema') 410 | response = bda_client.update_blueprint( 411 | blueprintArn=blueprint['blueprintArn'], 412 | blueprintStage=blueprint_stage, 413 | schema=json.dumps(blueprint_schema) 414 | ) 415 | 416 | return response['blueprint']['blueprintArn'] 417 | 418 | 419 | def transform_custom_output(input_json, explainability_info): 420 | result = { 421 | "forms": {}, 422 | "tables": {} 423 | } 424 | 425 | def add_confidence(value, conf_info): 426 | return {"value": value, "confidence": conf_info["confidence"]} if isinstance(conf_info, dict) and "confidence" in conf_info else value 427 | 428 | def process_list_item(item, conf_info): 429 | return {k: add_confidence(v, conf_info.get(k, {})) for k, v in item.items() if isinstance(conf_info, dict)} 430 | 431 | # Iterate through the input JSON 432 | for key, value in input_json.items(): 433 | confidence_data = explainability_info.get(key, {}) 434 | if isinstance(value, list): 435 | # Handle lists (tables) 436 | processed_list = [] 437 | for idx, item in enumerate(value): 438 | if isinstance(item, dict): 439 | # Process each item in the list using its corresponding confidence info 440 | conf_info = confidence_data[idx] if isinstance(confidence_data, list) else confidence_data 441 | processed_list.append(process_list_item(item, conf_info)) 442 | result["tables"][key] = processed_list 443 | else: 444 | # Handle simple key-value pairs (forms) 445 | result["forms"][key] = add_confidence(value, confidence_data) 446 | 447 | return result 448 | 449 | 450 | def get_summaries(custom_outputs): 451 | return [{ 452 | 'page_indices': output.get('split_document', {}).get('page_indices'), 453 | 'matched_blueprint_name': output.get('matched_blueprint', {}).get('name'), 454 | 'confidence': output.get('matched_blueprint', {}).get('confidence'), 455 | 'document_class_type': output.get('document_class', {}).get('type') 456 | } if output else {} for output in custom_outputs] 457 | 458 | def restart_kernel(): 459 | # Function to display message after restart 460 | def show_restart_message(): 461 | # Wait for kernel restart 462 | time.sleep(2) # nosemgrep 463 | print("Restarting Kernel...Wait a few seconds and progress executing subsequent cells.") 464 | 465 | # Display message and restart kernel 466 | show_restart_message() 467 | display(HTML("")) 468 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/assets/lambdas/claims-review-agent-action/index.py: -------------------------------------------------------------------------------- 1 | import json 2 | import boto3 3 | import os 4 | 5 | s3 = boto3.client("s3") 6 | 7 | session = boto3.Session() 8 | rds_data = session.client( 9 | service_name='rds-data' 10 | ) 11 | 12 | CLAIMS_DB_CLUSTER_ARN = os.environ['CLAIMS_DB_CLUSTER_ARN'] 13 | CLAIMS_DB_DATABASE_NAME = os.environ['CLAIMS_DB_DATABASE_NAME'] 14 | CLAIMS_DB_CREDENTIALS_SECRET_ARN = os.environ['CLAIMS_DB_CREDENTIALS_SECRET_ARN'] 15 | 16 | 17 | MEMBER_DETAILS_QUERY = """ 18 | SELECT insured_id,insured_name,insured_group_number,insured_plan_name,insured_birth_date,insured_policy_number,phone_number 19 | ,address FROM Insured_Person WHERE insured_policy_number=:insured_policy_number; 20 | """ 21 | 22 | PATIENT_DETAILS_QUERY = """ 23 | SELECT p.patient_id,i.insured_id,p.patient_firstname,p.patient_lastname,p.patient_birth_date,p.relationship_to_insured,p.phone_number,p.sex,p.address 24 | FROM Patient p, Insured_Person i WHERE i.insured_id = p.insured_id AND i.insured_policy_number = :insured_policy_number 25 | AND patient_lastname=:patient_lastname AND patient_birth_date=TO_DATE(:patient_birth_date,'YYYY-MM-DD'); 26 | """ 27 | 28 | MEMBER_AND_PATIENT_DETAILS_QUERY = """ 29 | SELECT 30 | i.insured_id,i.insured_name,i.insured_group_number,i.insured_plan_name,i.insured_birth_date,i.insured_policy_number,i.address insured_address,i.phone_number insured_phone_number, 31 | p.patient_id,p.patient_firstname,p.patient_lastname,p.patient_birth_date,p.relationship_to_insured,p.phone_number patient_phone_number,p.sex patient_sex,p.address patient_address 32 | FROM Patient p, Insured_Person i WHERE i.insured_id = p.insured_id AND i.insured_policy_number = :insured_policy_number 33 | AND patient_lastname=:patient_lastname AND patient_birth_date=TO_DATE(:patient_birth_date,'YYYY-MM-DD'); 34 | """ 35 | 36 | CREATE_CLAIM_QUERY = """ 37 | INSERT INTO Claim (patient_id,claim_date,diagnosis_1,diagnosis_2,diagnosis_3,diagnosis_4,total_charges,balanceDue, amountPaid,claim_status) VALUES 38 | (:patient_id, TO_DATE(:claim_date, 'YYYY-MM-DD'), :diagnosis_1, :diagnosis_2, :diagnosis_3, :diagnosis_4, :total_charges,:balanceDue, :amountPaid, :claim_status) 39 | RETURNING claim_id 40 | """ 41 | 42 | UPDATE_CLAIM_QUERY = """ 43 | UPDATE CLAIM 44 | SET claim_status = :claim_status 45 | WHERE claim_id = :claim_id 46 | RETURNING claim_id, claim_status 47 | """ 48 | 49 | CREATE_SERVICE_QUERY = """ 50 | INSERT INTO SERVICE (claim_id, date_of_service, place_of_service,type_of_service,procedure_code) VALUES 51 | (:claim_id, TO_DATE(:date_of_service, 'YYYY-MM-DD'), :place_of_service, :type_of_service, :procedure_code) 52 | RETURNING claim_id, service_id 53 | """ 54 | 55 | 56 | class ParameterError(Exception): 57 | """Base exception for parameter-related errors""" 58 | pass 59 | 60 | class MissingParametersError(ParameterError): 61 | """Raised when the parameters dict is empty or missing""" 62 | pass 63 | 64 | class ParameterNotFoundError(ParameterError): 65 | """Raised when a specific parameter is not found""" 66 | pass 67 | 68 | 69 | def run_command(sql_statement, parameters=None): 70 | print(f"SQL statement: {sql_statement}") 71 | result = rds_data.execute_statement( 72 | resourceArn=CLAIMS_DB_CLUSTER_ARN, 73 | secretArn=CLAIMS_DB_CREDENTIALS_SECRET_ARN, 74 | database=CLAIMS_DB_DATABASE_NAME, 75 | sql=sql_statement, 76 | includeResultMetadata=True, 77 | parameters=parameters 78 | ) 79 | return result 80 | 81 | def getClaimsFormData(event) : 82 | s3_uri = get_parameter(event, "s3URI") 83 | response = s3.get_object(Bucket=s3_uri.split('/',3)[2], Key=s3_uri.split('/',3)[3]) 84 | content = response['Body'].read().decode('utf-8') 85 | json_content = json.loads(content) 86 | 87 | #create response json as a list of dictionaries 88 | response = { 89 | "claims_form_data": json_content 90 | } 91 | return response 92 | 93 | 94 | def getAllOpenClaims(event) : 95 | 96 | #create response json as a list of dictionaries 97 | response = [ 98 | { 99 | "claimId": "11111111", 100 | "policyHolderId": "John Doe", 101 | "claimStatus": "2021-01-01", 102 | } 103 | ] 104 | return response 105 | 106 | def get_parameter(event, parameter_name): 107 | params = event["parameters"] 108 | if not params: 109 | raise MissingParametersError("No parameters provided") 110 | else: 111 | param = [p for p in params if p["name"] == parameter_name] 112 | if not param: 113 | raise ParameterNotFoundError(f"Missing parameter: {parameter_name}") 114 | else: 115 | return param[0]["value"] 116 | 117 | def get_request_property(event, property_name, defaultValue=None): 118 | request_body = event["requestBody"] 119 | content = request_body["content"] 120 | application_json = content["application/json"] 121 | properties = application_json["properties"] 122 | property = [p for p in properties if p["name"]==property_name] 123 | if not property: 124 | if not defaultValue: 125 | raise ParameterNotFoundError(f"Missing parameter: {property_name}") 126 | else: 127 | return defaultValue 128 | else: 129 | value = None 130 | match property[0]["type"]: 131 | case 'string': 132 | value = str(property[0]["value"]) 133 | case 'number': 134 | value = float(property[0]["value"]) 135 | case 'integer': 136 | value = int(property[0]["value"]) 137 | case _: 138 | value = property[0]["value"] 139 | return value 140 | 141 | def results_by_column_name(result): 142 | columns = [column["name"] for column in result["columnMetadata"]] 143 | records = result["records"] 144 | results = [] 145 | for record in records: 146 | print(record) 147 | values = [list(value.values())[0] for value in record] 148 | print(values) 149 | results.append(dict(zip(columns, values))) 150 | print(results) 151 | return results 152 | 153 | # Function to create parameter dict 154 | def create_param(name, value): 155 | print(f"name:{name}, value:{value}") 156 | if value is None: 157 | return {'name': name, 'value': {'isNull': True}} 158 | elif isinstance(value, str): 159 | return {'name': name, 'value': {'stringValue': value}} 160 | elif isinstance(value, int): 161 | return {'name': name, 'value': {'longValue': value}} 162 | elif isinstance(value, float): 163 | return {'name': name, 'value': {'doubleValue': value}} 164 | elif isinstance(value, bool): 165 | return {'name': name, 'value': {'booleanValue': value}} 166 | else: 167 | raise ValueError(f"Unsupported type for {name}: {type(value)}") 168 | 169 | def getMemberAndPatientDetails(event) : 170 | 171 | insured_policy_number = get_parameter(event, "insured_id_number") 172 | patient_lastname = get_parameter(event, "patient_last_name") 173 | patient_birth_date = get_parameter(event, "patient_birth_date") 174 | parameters=[ 175 | { 176 | 'name':'insured_policy_number', 177 | 'value':{'stringValue':insured_policy_number} 178 | }, 179 | { 180 | 'name':'patient_lastname', 181 | 'value':{'stringValue':patient_lastname} 182 | }, 183 | { 184 | 'name':'patient_birth_date', 185 | 'value':{'stringValue':patient_birth_date} 186 | } 187 | ] 188 | 189 | result = run_command(MEMBER_AND_PATIENT_DETAILS_QUERY, parameters) 190 | print(result) 191 | data = results_by_column_name(result) 192 | if not data: 193 | return f""" 194 | Unable to get Member and/or Patient details with 195 | Insured Id Number={insured_policy_number}, 196 | Patient Last Name={patient_lastname}, 197 | Patient Birth Date={patient_birth_date} 198 | """ 199 | member = data[0] 200 | response = { 201 | "insuredId": member['insured_id'], 202 | "memberName": member['insured_name'], 203 | "memberAddress": member['insured_address'], 204 | "memberDateOfBirth": member['insured_birth_date'], 205 | "memberPlanDetails": { 206 | "memberGroupNumber": member['insured_group_number'], 207 | "memberPlanName": member['insured_plan_name'], 208 | "memberPlanNumber": member['insured_policy_number'], 209 | }, 210 | "memberPhoneNumber": member['insured_phone_number'], 211 | "patientId": member['patient_id'], 212 | "patientFirstName": member['patient_firstname'], 213 | "patientLastName": member['patient_lastname'], 214 | "patientDateOfBirth": member['patient_birth_date'], 215 | "patientRelationshipToInsured": member['relationship_to_insured'], 216 | "patientPhoneNumber": member['patient_phone_number'], 217 | "patientSex": member['patient_sex'], 218 | "patientAddress": member['patient_address'], 219 | } 220 | 221 | return response 222 | 223 | def getMemberDetails(event) : 224 | 225 | insured_policy_number = get_parameter(event, "insured_id_number") 226 | parameters=[ 227 | { 228 | 'name':'insured_policy_number', 229 | 'value':{'stringValue':insured_policy_number} 230 | } 231 | ] 232 | 233 | result = run_command(MEMBER_DETAILS_QUERY, parameters) 234 | print(result) 235 | data = results_by_column_name(result) 236 | if not data: 237 | return f"Insured Member with last name {insured_policy_number} not found" 238 | member = data[0] 239 | response = {"memberName": member['insured_name'], 240 | "memberAddress": member['address'], 241 | "memberDateOfBirth": member['insured_birth_date'], 242 | "memberPlanDetails": { 243 | "memberGroupNumber": member['insured_group_number'], 244 | "memberPlanName": member['insured_plan_name'], 245 | "memberPlanNumber": member['insured_policy_number'], 246 | }, 247 | "memberPhoneNumber": member['phone_number'] 248 | } 249 | 250 | return response 251 | 252 | def listClaimsForInsured(event) : 253 | response = [ 254 | { 255 | "claimId": "XXXXXXXX", 256 | "policyHolderId": "John Doe", 257 | "claimStatus": "2021-01-01", 258 | } 259 | ] 260 | return response 261 | 262 | def getClaim(event): 263 | response = {"claimId": "XXXXXXXX", 264 | "claim_description": "Not Implemented" 265 | } 266 | 267 | return response 268 | 269 | def create_claim(event) : 270 | parameters = [ 271 | create_param("patient_id", get_request_property (event, "patient_id")), 272 | create_param("claim_date", get_request_property(event,"claim_date")), 273 | create_param("diagnosis_1", get_request_property(event,"diagnosis_1")), 274 | create_param("diagnosis_2", get_request_property(event,"diagnosis_2",'')), 275 | create_param("diagnosis_3", get_request_property(event,"diagnosis_3",'')), 276 | create_param("diagnosis_4", get_request_property(event,"diagnosis_4",'')), 277 | create_param("total_charges", get_request_property(event,"total_charges")), 278 | create_param("amountPaid", get_request_property(event,"amount_paid")), 279 | create_param("balanceDue", get_request_property(event,"balance")), 280 | create_param("claim_status", get_request_property(event,"claim_status","NEW")) 281 | ] 282 | print(parameters) 283 | result = run_command(sql_statement=CREATE_CLAIM_QUERY, parameters=parameters) 284 | 285 | print(result) 286 | data = results_by_column_name(result) 287 | if not data: 288 | raise ParameterNotFoundError("Missing return record after Insert") 289 | response = { 290 | "claim_id": data[0]["claim_id"] 291 | } 292 | return response 293 | 294 | 295 | def update_claim(event) : 296 | parameters = [ 297 | create_param("claim_id", int(get_parameter (event, "claim_id"))), 298 | create_param("claim_status", get_request_property(event,"status","ADJUDICATOR_REVIEW")) 299 | ] 300 | print(parameters) 301 | result = run_command(sql_statement=UPDATE_CLAIM_QUERY, parameters=parameters) 302 | 303 | print(result) 304 | data = results_by_column_name(result) 305 | if not data: 306 | raise ParameterNotFoundError("Missing return record after Insert") 307 | response = { 308 | "claim_id": data[0]["claim_id"], 309 | "claim_status": data[0]["claim_status"] 310 | } 311 | return response 312 | 313 | def create_claim_service(event): 314 | try: 315 | claim_id = int(get_parameter(event, "claim_id")) 316 | except (ValueError, TypeError): 317 | return {'error': 'Invalid claim_id. Please provide a valid integer value'} 318 | 319 | parameters = [ 320 | create_param("claim_id", claim_id), 321 | create_param("date_of_service", get_request_property(event,"date_of_service")), 322 | create_param("place_of_service", get_request_property(event,"place_of_service")), 323 | create_param("type_of_service", get_request_property(event,"type_of_service")), 324 | create_param("procedure_code", get_request_property(event,"procedure_code")), 325 | create_param("amount", get_request_property(event,"amount")) 326 | ] 327 | result = run_command(sql_statement=CREATE_SERVICE_QUERY, parameters=parameters) 328 | print(result) 329 | data = results_by_column_name(result) 330 | if not data: 331 | raise ParameterNotFoundError("Missing return record after Insert") 332 | response = { 333 | "claim_id": data[0]["claim_id"], 334 | "service_id": data[0]["service_id"] 335 | } 336 | return response 337 | 338 | 339 | def getPatient(event): 340 | 341 | patient_lastname = get_parameter(event, "patient_lastName") 342 | patient_birth_date = get_parameter(event, "patient_birth_date") 343 | insured_policy_number = get_parameter(event, "insured_id_number") 344 | parameters=[ 345 | { 346 | 'name':'patient_lastname', 347 | 'value':{'stringValue':patient_lastname} 348 | }, 349 | { 350 | 'name':'insured_policy_number', 351 | 'value':{'stringValue':insured_policy_number} 352 | }, 353 | { 354 | 'name':'patient_birth_date', 355 | 'value':{'stringValue':patient_birth_date} 356 | } 357 | ] 358 | 359 | result = run_command(PATIENT_DETAILS_QUERY, parameters) 360 | print(result) 361 | data = results_by_column_name(result) 362 | if not data: 363 | return f"Patient with last name {patient_lastname} and birth data {patient_birth_date} not found associated with insured id number {insured_policy_number}" 364 | patient = data[0] 365 | response = { 366 | "firstName": patient['patient_firstname'], 367 | "lastName": patient['patient_lastname'], 368 | "dateOfBirth": patient['patient_birth_date'], 369 | "gender": patient['sex'], 370 | "address": patient['address'], 371 | "relationshipToInsured": patient['relationship_to_insured'], 372 | "phoneNumber": patient['phone_number'] 373 | } 374 | 375 | return response 376 | 377 | def createPatient(event) : 378 | CREATE_CLAIM_QUERY.format(claim_values=get_parameter(event, "claim_values")) 379 | response = {"claimId": "XXXXXXXX"} 380 | return response 381 | 382 | 383 | def lambda_handler(event, context): 384 | print(event) 385 | action = event["actionGroup"] 386 | api_path = event["apiPath"] 387 | httpMethod = event["httpMethod"] 388 | response_code = 200 389 | response = None 390 | try: 391 | match api_path: 392 | case '/member_and_patient': 393 | response = getMemberAndPatientDetails(event) 394 | case '/member/{insured_id_number}': 395 | response = getMemberDetails(event) 396 | case '/claims' : 397 | if(httpMethod == "GET"): 398 | response = getAllOpenClaims(event) 399 | elif(httpMethod == "POST"): 400 | response = create_claim(event) 401 | case '/patient' : 402 | if(httpMethod == "GET"): 403 | response = getPatient(event) 404 | elif(httpMethod == "POST"): 405 | response = createPatient(event) 406 | case '/get_claims_form_data': 407 | response = getClaimsFormData(event) 408 | case '/claims/{claim_id}/service': 409 | response = create_claim_service(event) 410 | case '/claims/{claim_id}': 411 | if(httpMethod == "GET"): 412 | response = getClaim(event) 413 | elif(httpMethod == "PATCH"): 414 | response = update_claim(event) 415 | case '/claims/insured/{insuredId}': 416 | response = listClaimsForInsured(event) 417 | case 'claims/{claim_id}/service': 418 | response = create_claim_service(event) 419 | case _: 420 | response_code = 404 421 | response = {"error": f"{action}::{api_path} is not a valid API, try another one."} 422 | except ParameterError as pe: 423 | response_code = 400 424 | response = {"error": str(pe)} 425 | except Exception as e: 426 | response_code = 500 427 | response = {"error": str(e)} 428 | 429 | 430 | response_body = {"application/json": {"body": json.dumps(response)}} 431 | 432 | 433 | action_response = { 434 | "actionGroup": event["actionGroup"], 435 | "apiPath": event["apiPath"], 436 | "httpMethod": event["httpMethod"], 437 | "httpStatusCode": response_code, 438 | "responseBody": response_body, 439 | } 440 | 441 | session_attributes = event["sessionAttributes"] 442 | prompt_session_attributes = event["promptSessionAttributes"] 443 | 444 | api_response = { 445 | "messageVersion": "1.0", 446 | "response": action_response, 447 | "sessionAttributes": session_attributes, 448 | "promptSessionAttributes": prompt_session_attributes, 449 | } 450 | print(api_response) 451 | return api_response -------------------------------------------------------------------------------- /10-Understanding-BDA/11_getting_started_with_bda.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "30c014be-715c-4d02-b8a0-bbded2352750", 6 | "metadata": {}, 7 | "source": [ 8 | "# How Bedrock Data Automation works\n", 9 | "\n", 10 | "Bedrock Data Automation (BDA) lets you configure output based on your processing needs for a specific data type: documents, images, video or audio. BDA can generate standard output or custom output. Below are some key concepts for understanding how BDA works. If you're a new user, start with the information about standard output.\n", 11 | "\n", 12 | "* **Standard output** – Sending a file to BDA with no other information returns the default standard output, which consists of commonly required information that's based on the data type. Examples include audio transcriptions, scene summaries for video, and document summaries. These outputs can be tuned to your use case using projects to modify them. For more information, see e.g. [Standard output for documents in Bedrock Data Automation](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-output-documents.html).\n", 13 | "\n", 14 | "* **Custom output** – For documents and images, only. Choose custom output to define exactly what information you want to extract using a blueprint. A blueprint consists of a list of expected fields that you want retrieved from a document or image. Each field represents a piece of information that needs to be extracted to meet your specific use case. You can create your own blueprints, or select predefined blueprints from the BDA blueprint catalog. For more information, see [Custom output and blueprints](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-custom-output-idp.html).\n", 15 | "\n", 16 | "* **Projects** – A project is a BDA resource that allows you to modify and organize output configurations. Each project can contain standard output configurations for documents, images, video, and audio, as well as custom output blueprints for documents and images. Projects are referenced in the `InvokeDataAutomationAsync` API call to instruct BDA on how to process the files. For more information about projects and their use cases, see [Bedrock Data Automation projects](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-projects.html).\n", 17 | "\n", 18 | "In this notebook, we see will see how we can get started with using BDA API for your document processing use cases. The Amazon Bedrock Data Automation (BDA) feature provides a streamlined API workflow for processing your data. For all modalities, this workflow consists of three main steps: creating a project, invoking the analysis, and retrieving the results. To retrieve custom output for your processed data, you provide the Blueprint ARN when you invoke the analysis operation." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "444c3287-fb3e-4da9-9d37-728456ac52fe", 24 | "metadata": { 25 | "editable": true, 26 | "slideshow": { 27 | "slide_type": "" 28 | }, 29 | "tags": [] 30 | }, 31 | "source": [ 32 | "## Prerequisites" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "1264708d-a57e-4e71-89f5-3090dbf73972", 38 | "metadata": {}, 39 | "source": [ 40 | "### Configure IAM Permissions\n", 41 | "\n", 42 | "The features being explored in the workshop require multiple IAM Policies for the role being used. If you're running this notebook within SageMaker Studio in your own Account, update the default execution role for the SageMaker user profile to include the IAM policies described in [README.md](../README.md)." 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "6c12476d-7970-46cf-9488-7bc1dc1ca6ad", 48 | "metadata": {}, 49 | "source": [ 50 | "### Install Required Libraries" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "1fd8ca60-b430-4047-88fa-1a5189ec57aa", 57 | "metadata": { 58 | "editable": true, 59 | "slideshow": { 60 | "slide_type": "" 61 | }, 62 | "tags": [] 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "%pip install --no-warn-conflicts \"boto3>=1.37.6\" itables==2.2.4 PyPDF2==3.0.1 --upgrade -q" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "id": "58746522-d462-486a-a2b9-2b57dec72f84", 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "from utils.helper_functions import restart_kernel\n", 77 | "restart_kernel()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "aaf88a7d-9164-49f8-a83e-402600e4297a", 84 | "metadata": { 85 | "editable": true, 86 | "slideshow": { 87 | "slide_type": "" 88 | }, 89 | "tags": [] 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "%load_ext autoreload\n", 94 | "%autoreload 2" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "77914217-a4b8-4220-9dd4-4382a1695f87", 100 | "metadata": { 101 | "editable": true, 102 | "slideshow": { 103 | "slide_type": "" 104 | }, 105 | "tags": [] 106 | }, 107 | "source": [ 108 | "### Setup\n", 109 | "\n", 110 | "Before we get to the part where we invoke BDA with our sample artifacts, let's setup some parameters and configuration that will be used throughout this notebook" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "id": "9c9bf70d-ae3b-4988-bbd7-4e544675dcbd", 117 | "metadata": { 118 | "editable": true, 119 | "slideshow": { 120 | "slide_type": "" 121 | }, 122 | "tags": [] 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "import boto3\n", 127 | "import json\n", 128 | "from IPython.display import JSON, IFrame\n", 129 | "import sagemaker\n", 130 | "from utils.helper_functions import read_s3_object, wait_for_job_to_complete, get_bucket_and_key\n", 131 | "from pathlib import Path\n", 132 | "import os\n", 133 | "\n", 134 | "session = sagemaker.Session()\n", 135 | "default_bucket = session.default_bucket()\n", 136 | "current_region = boto3.session.Session().region_name\n", 137 | "\n", 138 | "sts_client = boto3.client('sts')\n", 139 | "account_id = sts_client.get_caller_identity()['Account']\n", 140 | "\n", 141 | "# Initialize Bedrock Data Automation client\n", 142 | "bda_client = boto3.client('bedrock-data-automation')\n", 143 | "bda_runtime_client = boto3.client('bedrock-data-automation-runtime')\n", 144 | "s3_client = boto3.client('s3')\n", 145 | "\n", 146 | "bda_s3_input_location = f's3://{default_bucket}/bda/input'\n", 147 | "bda_s3_output_location = f's3://{default_bucket}/bda/output'\n", 148 | "\n", 149 | "print(f\"My BDA output s3 URI: {bda_s3_output_location}\")" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "id": "fc838291-b380-442c-9342-e0f1c399527b", 155 | "metadata": { 156 | "editable": true, 157 | "slideshow": { 158 | "slide_type": "" 159 | }, 160 | "tags": [] 161 | }, 162 | "source": [ 163 | "## Prepare sample document\n", 164 | "For this lab, we use a sample `Bank Statement` for Fiscal Year 2025 through November 30, 2024. The document is prepared by the Bureau of the Fiscal Service, Department of the Treasury and provides detailed information on the government's financial activities. We will extract a subset of pages from the `PDF` document and use BDA to extract and analyse the document content.\n", 165 | "\n", 166 | "### Download and store sample document\n", 167 | "we use the document url to download the document and store it a S3 location. \n", 168 | "\n", 169 | "Note - We will configure BDA to use the sample input from this S3 location, so we need to ensure that BDA has `s3:GetObject` access to this S3 location. If you are running the notebook in your own AWS Account, ensure that the SageMaker Execution role configured for this JupyterLab app has the right IAM permissions." 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "id": "a7e877eb-dcf7-4ce0-bc55-2b75af452a3b", 176 | "metadata": { 177 | "editable": true, 178 | "slideshow": { 179 | "slide_type": "" 180 | }, 181 | "tags": [] 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "local_download_path = \"data/documents/\"\n", 186 | "local_file_name = \"BankStatement.jpg\"\n", 187 | "file_path_local = f\"{local_download_path}/{local_file_name}\"\n", 188 | "os.makedirs(local_download_path, exist_ok=True)\n", 189 | "\n", 190 | "# Download Sample file\n", 191 | "#(bucket, key) = get_bucket_and_key(document_url)\n", 192 | "#response = s3_client.download_file(bucket, key, file_path_local)\n", 193 | "\n", 194 | "# Upload the document to S3\n", 195 | "document_s3_uri = f'{bda_s3_input_location}/{local_file_name}'\n", 196 | "\n", 197 | "target_s3_bucket, target_s3_key = get_bucket_and_key(document_s3_uri)\n", 198 | "s3_client.upload_file(file_path_local, target_s3_bucket, target_s3_key)\n", 199 | "\n", 200 | "print(f\"Downloaded file to: {file_path_local}\")\n", 201 | "print(f\"Uploaded file to S3: {target_s3_key}\")\n", 202 | "print(f\"document_s3_uri: {document_s3_uri}\")" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "id": "24270477-69c5-4f4c-b171-3dde759f2068", 208 | "metadata": { 209 | "editable": true, 210 | "slideshow": { 211 | "slide_type": "" 212 | }, 213 | "tags": [] 214 | }, 215 | "source": [ 216 | "### View Sample Document" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "08521fcf-c8bc-413f-bc9d-b3a93e6be0da", 223 | "metadata": { 224 | "editable": true, 225 | "slideshow": { 226 | "slide_type": "" 227 | }, 228 | "tags": [] 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "IFrame(file_path_local, width=600, height=400)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "id": "ff2ef08f-2d3f-441b-8325-edbf98b90089", 238 | "metadata": {}, 239 | "source": [ 240 | "## Using BDA for standard output\n", 241 | "\n", 242 | "Sending e.g. a document to BDA with no other information using the [`InvokeDataAutomationAsync` API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-data-automation-runtime/client/invoke_data_automation_async.html) looks as follows:\n", 243 | "\n", 244 | "BDA will process the file provided in `inputConfiguration` and write the output to the s3 URI of `outputConfiguration`.\n", 245 | "\n", 246 | "```python\n", 247 | "response = bda_runtime_client.invoke_data_automation_async(\n", 248 | " inputConfiguration={\n", 249 | " 's3Uri': 's3://bedrock-data-automation-prod-assets-us-west-2/demo-assets/Document/BankStatement.jpg'\n", 250 | " },\n", 251 | " outputConfiguration={\n", 252 | " 's3Uri': 's3://my_output'\n", 253 | " },\n", 254 | ")\n", 255 | "```" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "id": "9f3232db-8ed2-4ef6-921c-c2a8165f30db", 261 | "metadata": {}, 262 | "source": [ 263 | "### Invoking BDA for standard output" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "id": "e9199ffd-2889-4632-a683-b1d3d914ae91", 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "response = bda_runtime_client.invoke_data_automation_async(\n", 274 | " inputConfiguration={ \n", 275 | " 's3Uri': document_s3_uri\n", 276 | " },\n", 277 | " outputConfiguration={'s3Uri': f'{bda_s3_output_location}'},\n", 278 | " dataAutomationProfileArn = f'arn:aws:bedrock:{current_region}:{account_id}:data-automation-profile/us.data-automation-v1',\n", 279 | " dataAutomationConfiguration = {\n", 280 | " 'dataAutomationProjectArn': f'arn:aws:bedrock:{current_region}:aws:data-automation-project/public-default',\n", 281 | " }\n", 282 | ")\n", 283 | "JSON(response)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "id": "bbfe5bc6-2e59-46fc-8f90-a22281622eb1", 289 | "metadata": {}, 290 | "source": [ 291 | "### Get data automation job status" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "id": "d0cd06e2-6b32-4648-9495-1735f560f6ba", 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "status_response = wait_for_job_to_complete(invocationArn=response[\"invocationArn\"])\n", 302 | "JSON(status_response)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "id": "d9b52698-02dc-4440-95d2-2b1b5c18f37e", 308 | "metadata": {}, 309 | "source": [ 310 | "### Retrieve job metadata" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "id": "08e051a1-118e-4f5a-ac94-5a983b8add0e", 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "job_metadata_s3 = status_response[\"outputConfiguration\"][\"s3Uri\"]\n", 321 | "print(f\"Retrieving job metadata: {job_metadata_s3}\")\n", 322 | "job_metadata = json.loads(read_s3_object(job_metadata_s3))\n", 323 | "\n", 324 | "JSON(job_metadata,root='job_metadata',expanded=True)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "id": "b67cd4f5-4574-4545-b6c9-7cff198e2c82", 330 | "metadata": {}, 331 | "source": [ 332 | "### Get job results for standard output\n", 333 | "\n", 334 | "The standard output will contain the following fields\n", 335 | "\n", 336 | "* metadata: simple document metadata like location and number of pages\n", 337 | "* document: Contains document statistics on number of elements, tables, and figures\n", 338 | "* pages: Contains markdown version of each page\n", 339 | "* elements: Contains details and references to Text blocks, figures, tables, charts, etc.\n", 340 | "\n", 341 | "Note that the standard output can configured to contain much more information about the document structure, or descriptions of figures, charts, etc. We will explore this in the next notebook" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "id": "34c15055-4cdf-4830-bb31-b801c8f95e3d", 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "standard_output_path = job_metadata[\"output_metadata\"][0][\"segment_metadata\"][0][\"standard_output_path\"]\n", 352 | "print(f\"Receiving the jobs results from: {standard_output_path}\")\n", 353 | "standard_output = json.loads(read_s3_object(standard_output_path))\n", 354 | "JSON(standard_output, root=\"standard_output\")" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "id": "53177bdf-c172-4879-bfa6-8fff323e7a97", 360 | "metadata": {}, 361 | "source": [ 362 | "## Using BDA for custom outputs with blueprints\n", 363 | "\n", 364 | "We can also provide a list of blueprints to be used when invoking BDA through the `InvokeDataAutomationAsync` API.\n", 365 | "BDA will match the document against the blueprints and extract or derive structured insights based on the blueprint definitions.\n", 366 | "\n", 367 | "We will see follow up notebooks how this works in more detail. Here we provide just a high level overview how it can be used, for example in `us-east-1` region.\n", 368 | "\n", 369 | "```python\n", 370 | "response = bda_runtime_client.invoke_data_automation_async(\n", 371 | " inputConfiguration={\n", 372 | " 's3Uri': 's3://bedrock-data-automation-prod-assets-us-east-1/demo-assets/Document/BankStatement.jpg'\n", 373 | " },\n", 374 | " outputConfiguration={\n", 375 | " 's3Uri': 's3://my_output'\n", 376 | " },\n", 377 | " dataAutomationProfileArn = f'arn:aws:bedrock:{current_region}:{account_id}:data-automation-profile/us.data-automation-v1',\n", 378 | " blueprints=[\n", 379 | " {\n", 380 | " 'blueprintArn': 'arn:aws:bedrock:us-east-1:aws:blueprint/bedrock-data-automation-public-bank-statement', \n", 381 | " },\n", 382 | "]\n", 383 | ")\n", 384 | "```" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "id": "75c9a2cf-0796-4482-aa30-0dbb63409d02", 390 | "metadata": {}, 391 | "source": [ 392 | "## Using projects with custom output and standard output\n", 393 | "\n", 394 | "A data automation project allows to bundle multiple configurations together, to be consumed as a single unit.\n", 395 | "It allows in particular to\n", 396 | "\n", 397 | "* extend the standard output by defining the granularity and types insights using `standardOutputConfiguration`\n", 398 | "* define a list of blueprints using `customOutputConfiguration`\n", 399 | "* activate document splitting using `overrideConfiguration`\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "id": "f6234dcc-7201-439a-a963-ccb945064810", 405 | "metadata": {}, 406 | "source": [ 407 | "### Creating a data automation project\n", 408 | "\n", 409 | "The follow preview shows how we can create a data automation project using the boto3 client.\n", 410 | "\n", 411 | "```python\n", 412 | "import boto3\n", 413 | "\n", 414 | "client = boto3.client('bedrock-data-automation')\n", 415 | "response = bda_runtime_client.create_data_automation_project(\n", 416 | " projectName='my name',\n", 417 | " projectDescription='my description',\n", 418 | " projectStage='LIVE',\n", 419 | " standardOutputConfiguration={\n", 420 | " \"document\": {\n", 421 | " \"extraction\": {\n", 422 | " \"granularity\": {\"types\": [\"DOCUMENT\",\"PAGE\", \"ELEMENT\",\"LINE\",\"WORD\"]},\n", 423 | " \"boundingBox\": {\"state\": \"ENABLED\"}\n", 424 | " },\n", 425 | " \"generativeField\": {\"state\": \"ENABLED\"},\n", 426 | " \"outputFormat\": {\n", 427 | " \"textFormat\": {\"types\": [\"PLAIN_TEXT\", \"MARKDOWN\", \"HTML\", \"CSV\"]},\n", 428 | " \"additionalFileFormat\": {\"state\": \"ENABLED\"}\n", 429 | " }\n", 430 | " },\n", 431 | " \"image\": {...},\n", 432 | " \"video\": {...},\n", 433 | " \"audio\": {...}\n", 434 | " },\n", 435 | " customOutputConfiguration={\n", 436 | " 'blueprints': [\n", 437 | " {\n", 438 | " 'blueprintArn': 'arn:aws:bedrock:us-west-2:aws:blueprint/bedrock-data-automation-public-bank-statement' \n", 439 | " },\n", 440 | " ]\n", 441 | " },\n", 442 | " overrideConfiguration={\n", 443 | " 'document': {\n", 444 | " 'splitter': {\n", 445 | " 'state': 'ENABLED'\n", 446 | " }\n", 447 | " }\n", 448 | " },\n", 449 | ")\n", 450 | "```" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "id": "ee5ed930-26a1-4a2a-b3f7-defebef0856b", 456 | "metadata": {}, 457 | "source": [ 458 | "### Invoking a data automation project\n", 459 | "\n", 460 | "We can now invoke a data automation project with an input file using the `InvokeDataAutomationAsync` API and by providing the previously created project ARN.\n", 461 | "\n", 462 | "```python\n", 463 | "response = bda_runtime_client.invoke_data_automation_async(\n", 464 | " inputConfiguration={\n", 465 | " 's3Uri': 's3://bedrock-data-automation-prod-assets-us-west-2/demo-assets/Document/BankStatement.jpg'\n", 466 | " },\n", 467 | " outputConfiguration={\n", 468 | " 's3Uri': 's3://my_output'\n", 469 | " },\n", 470 | " dataAutomationConfiguration={\n", 471 | " 'dataAutomationArn': 'arn:aws:bedrock:us-west-2:123456789101:data-automation-project/0644799db368',\n", 472 | " }\n", 473 | ")\n", 474 | "```" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "id": "f0be67fa-1743-4860-82ec-f97317bfe56c", 480 | "metadata": {}, 481 | "source": [ 482 | "In the next modules we will explore these approaches in more detail." 483 | ] 484 | } 485 | ], 486 | "metadata": { 487 | "kernelspec": { 488 | "display_name": "Python 3 (ipykernel)", 489 | "language": "python", 490 | "name": "python3" 491 | }, 492 | "language_info": { 493 | "codemirror_mode": { 494 | "name": "ipython", 495 | "version": 3 496 | }, 497 | "file_extension": ".py", 498 | "mimetype": "text/x-python", 499 | "name": "python", 500 | "nbconvert_exporter": "python", 501 | "pygments_lexer": "ipython3", 502 | "version": "3.11.10" 503 | } 504 | }, 505 | "nbformat": 4, 506 | "nbformat_minor": 5 507 | } 508 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/utils/bedrock_utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from botocore.exceptions import ClientError 3 | from .helper_functions import wait_for_completion 4 | import concurrent.futures 5 | import logging 6 | import random 7 | import string 8 | from IPython.display import display 9 | import pandas as pd 10 | from ipywidgets import Tab, Output, HTML 11 | 12 | logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def get_document_configuration(document_id, plan_name, plan_document_s3_uri): 17 | return { 18 | "content": { 19 | "custom": { 20 | "customDocumentIdentifier": { 21 | "id": document_id 22 | }, 23 | "s3Location": { 24 | "uri": plan_document_s3_uri 25 | }, 26 | "sourceType": "S3_LOCATION" 27 | }, 28 | "dataSourceType": "CUSTOM" 29 | }, 30 | "metadata": { 31 | "inlineAttributes": [ 32 | { 33 | "key": "plan_name", 34 | "value": { 35 | "stringValue": plan_name, 36 | "type": "STRING" 37 | } 38 | } 39 | ], 40 | "type": "IN_LINE_ATTRIBUTE" 41 | } 42 | } 43 | 44 | 45 | def create_agent_alias(bedrock_agent, agentAliasName, agentId, description): 46 | 47 | try: 48 | agent_alias_list = bedrock_agent.list_agent_aliases(agentId=agentId) 49 | existing_alias = next((agent for agent in agent_alias_list['agentAliasSummaries'] 50 | if agent['agentAliasName'] == agentAliasName), None) 51 | agentAliasId = None 52 | if(existing_alias): 53 | agentAliasId = existing_alias['agentAliasId'] 54 | bedrock_agent.update_agent_alias( 55 | agentAliasId=agentAliasId, 56 | agentAliasName=agentAliasName, 57 | agentId=agentId, 58 | description=description, 59 | ) 60 | else: 61 | create_agent_alias_response = bedrock_agent.create_agent_alias( 62 | agentAliasName=agentAliasName, 63 | agentId=agentId, 64 | description=description 65 | ) 66 | agentAliasId = create_agent_alias_response['agentAlias']['agentAliasId'] 67 | status_response = wait_for_completion( 68 | bedrock_agent, 69 | bedrock_agent.get_agent_alias, 70 | { 71 | 'agentId': agentId, 72 | 'agentAliasId':agentAliasId 73 | }, 74 | 'agentAlias.agentAliasStatus', 75 | ['PREPARED'], 76 | ['FAILED'], 77 | max_iterations=10, 78 | delay=5, 79 | ) 80 | status = status_response['agentAlias']['agentAliasStatus'] 81 | print(f"{'Updated' if existing_alias else 'Created'} agent alias with name {agentAliasName} and current status {status}") 82 | return agentAliasId, status 83 | except ClientError as e: 84 | print(f"Error creating or retrieving agent: {e}") 85 | raise 86 | except Exception as e: 87 | print(f"Error: {e}") 88 | raise 89 | 90 | 91 | def associate_agent_knowledge_base(bedrock_agent, agentId, agentVersion, description, knowledgeBaseId, knowledgeBaseState): 92 | 93 | agents_kb_list = bedrock_agent.list_agent_knowledge_bases( 94 | agentId=agentId, 95 | agentVersion=agentVersion) 96 | existing_agent_kb = next((agent_kb for agent_kb in agents_kb_list['agentKnowledgeBaseSummaries'] 97 | if agent_kb['knowledgeBaseId'] == knowledgeBaseId), None) 98 | if existing_agent_kb: 99 | print(f'Knowledge Base {knowledgeBaseId} already associated with agent {agentId}:{agentVersion}, Updating it.') 100 | bedrock_agent.update_agent_knowledge_base( 101 | agentId=agentId, 102 | agentVersion=agentVersion, 103 | description=description, 104 | knowledgeBaseId=knowledgeBaseId, 105 | knowledgeBaseState=knowledgeBaseState 106 | ) 107 | else: 108 | bedrock_agent.associate_agent_knowledge_base( 109 | agentId=agentId, 110 | agentVersion=agentVersion, 111 | description=description, 112 | knowledgeBaseId=knowledgeBaseId, 113 | knowledgeBaseState=knowledgeBaseState 114 | ) 115 | 116 | def create_agent_action_group(bedrock_agent, actionGroupName, description, 117 | actionGroupState, agentId, agentVersion, 118 | apiSchema,agent_actions_lambda_arn): 119 | 120 | try: 121 | agents_ag_list = bedrock_agent.list_agent_action_groups( 122 | agentId=agentId, 123 | agentVersion=agentVersion 124 | ) 125 | existing_agent_ag = next((agent for agent in agents_ag_list['actionGroupSummaries'] 126 | if agent['actionGroupName'] == actionGroupName), None) 127 | actionGroupId = None 128 | if existing_agent_ag: 129 | actionGroupId = existing_agent_ag['actionGroupId'] 130 | actionGroupName = existing_agent_ag['actionGroupName'] 131 | print(f"Action group with name {actionGroupName} already exists. Will update and enable it") 132 | bedrock_agent.update_agent_action_group( 133 | actionGroupExecutor={ 134 | 'lambda': agent_actions_lambda_arn 135 | }, 136 | actionGroupId=actionGroupId, 137 | actionGroupName=actionGroupName, 138 | actionGroupState='ENABLED', 139 | agentId=agentId, 140 | apiSchema=apiSchema, 141 | agentVersion=agentVersion 142 | ) 143 | else: 144 | # Create agent 145 | print(f'Creating new agent action group with name {actionGroupName}') 146 | create_action_group_response = bedrock_agent.create_agent_action_group( 147 | actionGroupExecutor={ 148 | 'lambda': agent_actions_lambda_arn 149 | }, 150 | actionGroupName=actionGroupName, 151 | actionGroupState='ENABLED', 152 | agentId=agentId, 153 | apiSchema=apiSchema, 154 | agentVersion=agentVersion 155 | ) 156 | actionGroupId = create_action_group_response['agentActionGroup']['actionGroupId'] 157 | status_response = wait_for_completion( 158 | bedrock_agent, 159 | bedrock_agent.get_agent_action_group, 160 | { 161 | 'actionGroupId': actionGroupId, 162 | 'agentId':agentId, 163 | 'agentVersion': agentVersion 164 | }, 165 | 'agentActionGroup.actionGroupState', 166 | ['ENABLED'], 167 | [], 168 | max_iterations=10, 169 | delay=2, 170 | ) 171 | status = status_response['agentActionGroup']['actionGroupState'] 172 | print(f"{'Updated' if existing_agent_ag else 'Created'} agent action group with name {actionGroupName} and current status {status}") 173 | return actionGroupId, status 174 | except ClientError as e: 175 | print(f"Error creating or retrieving agent: {e}") 176 | raise 177 | except Exception as e: 178 | print(f"Error: {e}") 179 | raise 180 | 181 | 182 | def create_agent(bedrock_agent, agentName, agent_service_role_arn, 183 | description, foundation_model_id, agent_instruction, orchestrationType): 184 | 185 | try: 186 | agents_list = bedrock_agent.list_agents() 187 | existing_agent = next((agent for agent in agents_list['agentSummaries'] 188 | if agent['agentName'] == agentName), None) 189 | agent_id = None 190 | agent_arn = None 191 | if existing_agent: 192 | agent_id = existing_agent['agentId'] 193 | agent_current_status = existing_agent['agentStatus'] 194 | print(f"Using existing Agent with name {existing_agent['agentName']} and status {agent_current_status}") 195 | update_agent_response = bedrock_agent.update_agent( 196 | agentId=agent_id, 197 | agentName=agentName, 198 | agentResourceRoleArn=agent_service_role_arn, 199 | description=description, 200 | foundationModel=foundation_model_id, 201 | instruction=agent_instruction, 202 | orchestrationType=orchestrationType 203 | ) 204 | agent_arn = update_agent_response['agent']['agentArn'] 205 | else: 206 | # Create agent 207 | print(f'Creating new agent with name {agentName}') 208 | create_agent_response = bedrock_agent.create_agent( 209 | agentName=agentName, 210 | agentResourceRoleArn=agent_service_role_arn, 211 | description=description, 212 | foundationModel=foundation_model_id, 213 | instruction=agent_instruction, 214 | orchestrationType=orchestrationType 215 | ) 216 | agent_id = create_agent_response['agent']['agentId'] 217 | agent_arn = create_agent_response['agent']['agentArn'] 218 | status_response = wait_for_completion( 219 | bedrock_agent, 220 | bedrock_agent.get_agent, 221 | {'agentId': agent_id}, 222 | 'agent.agentStatus', 223 | ['NOT_PREPARED', 'PREPARED'], 224 | ['FAILED'], 225 | max_iterations=10, 226 | delay=2, 227 | ) 228 | status = status_response['agent']['agentStatus'] 229 | version = status_response['agent'].get('agentVersion', None) 230 | print(f"{'Updated' if existing_agent else 'Created'} agent with name {agentName} and current status {status}") 231 | return agent_id, status, version, agent_arn 232 | except ClientError as e: 233 | print(f"Error creating or retrieving agent: {e}") 234 | raise 235 | except Exception as e: 236 | print(f"Error: {e}") 237 | raise 238 | 239 | 240 | def create_knowledge_base(bedrock_agent, kb_name, 241 | kb_description, 242 | kb_role_arn, 243 | embedding_model_arn, 244 | vector_store_collection_arn, 245 | vector_store_index_name): 246 | storage_configuration = { 247 | 'opensearchServerlessConfiguration': { 248 | 'collectionArn': vector_store_collection_arn, 249 | 'fieldMapping': { 250 | 'metadataField': 'text-metadata', 251 | 'textField': 'text', 252 | 'vectorField': 'vector' 253 | }, 254 | 'vectorIndexName': vector_store_index_name 255 | }, 256 | "type": 'OPENSEARCH_SERVERLESS' 257 | } 258 | embedding_model_configuration = { 259 | "bedrockEmbeddingModelConfiguration": { 260 | "dimensions": 1024 261 | } 262 | } 263 | knowledge_base_configuration = { 264 | 'type': 'VECTOR', 265 | 'vectorKnowledgeBaseConfiguration': { 266 | 'embeddingModelArn': embedding_model_arn, 267 | 'embeddingModelConfiguration': embedding_model_configuration 268 | } 269 | } 270 | try: 271 | kb_list = bedrock_agent.list_knowledge_bases() 272 | existing_kb = next((kb for kb in kb_list['knowledgeBaseSummaries'] 273 | if kb['name'] == kb_name), None) 274 | if existing_kb: 275 | knowledge_base_id = existing_kb['knowledgeBaseId'] 276 | kb_current_status = existing_kb['status'] 277 | if kb_current_status != 'ACTIVE': 278 | raise Exception(f"Knowledge Base with name {existing_kb['name']} exists but is not in ACTIVE state. Knowledge Base state: {kb_current_status}") 279 | print(f"Using existing Knowledge Base with name {existing_kb['name']} and status {kb_current_status}") 280 | return knowledge_base_id, kb_current_status 281 | else: 282 | # Create knowledge base 283 | print(f'Creating new KB with name {kb_name}') 284 | create_kb_response = bedrock_agent.create_knowledge_base( 285 | description=kb_description, 286 | knowledgeBaseConfiguration=knowledge_base_configuration, 287 | name=kb_name, 288 | roleArn=kb_role_arn, 289 | storageConfiguration=storage_configuration 290 | ) 291 | knowledge_base_id = create_kb_response['knowledgeBase']['knowledgeBaseId'] 292 | status_response = wait_for_completion( 293 | bedrock_agent, 294 | bedrock_agent.get_knowledge_base, 295 | {'knowledgeBaseId': knowledge_base_id}, 296 | 'knowledgeBase.status', 297 | ['ACTIVE'], 298 | ['FAILED'], 299 | max_iterations=10, 300 | delay=10, 301 | ) 302 | print(f"Created Knowledge Base with name {kb_name} and current status {status_response['knowledgeBase']['status']}") 303 | return knowledge_base_id, status_response['knowledgeBase']['status'] 304 | except ClientError as e: 305 | print(f"Error creating or retrieving knowledge base: {e}") 306 | raise 307 | except Exception as e: 308 | print(f"Error: {e}") 309 | raise 310 | 311 | 312 | def create_data_source(bedrock_agent, knowledge_base_id, datasource_name='claims-eoc-datasource') : 313 | 314 | data_source_configuration = { 315 | 'type': 'CUSTOM' 316 | } 317 | 318 | chunking_configuration = { 319 | 'chunkingStrategy': 'HIERARCHICAL', 320 | 'hierarchicalChunkingConfiguration': { 321 | 'levelConfigurations': [ 322 | { 323 | 'maxTokens': 1500 324 | }, 325 | { 326 | 'maxTokens': 300 327 | }, 328 | ], 329 | 'overlapTokens': 60 330 | } 331 | } 332 | 333 | ds_list = bedrock_agent.list_data_sources(knowledgeBaseId=knowledge_base_id) 334 | existing_ds = next((ds for 335 | ds in ds_list['dataSourceSummaries'] 336 | if ds['name'] == datasource_name), None) 337 | if (existing_ds): 338 | existing_ds_id = existing_ds['dataSourceId'] 339 | ds_current_status = existing_ds['status'] 340 | if ds_current_status != 'AVAILABLE': 341 | raise Exception(f"Data source with name {existing_ds['name']} exists but is not in AVAILABLE state. Data source state: {ds_current_status}") 342 | print(f"Using existing Data source with name {existing_ds['name']} and status {ds_current_status}") 343 | return existing_ds_id, ds_current_status 344 | else: 345 | print(f"Creating new Data source with name {datasource_name}") 346 | create_ds_response = bedrock_agent.create_data_source( 347 | dataSourceConfiguration=data_source_configuration, 348 | description='direct injection of claims eoc documents', 349 | knowledgeBaseId=knowledge_base_id, 350 | name=datasource_name, 351 | vectorIngestionConfiguration={ 352 | 'chunkingConfiguration': chunking_configuration 353 | } 354 | ) 355 | datasource_id = create_ds_response['dataSource']['dataSourceId'] 356 | status_response = wait_for_completion( 357 | bedrock_agent, 358 | bedrock_agent.get_data_source, 359 | {'knowledgeBaseId': knowledge_base_id, 'dataSourceId': datasource_id}, 360 | 'dataSource.status', 361 | ['AVAILABLE'], 362 | ['FAILED'], 363 | max_iterations=5, 364 | delay=5, 365 | ) 366 | print(f"Created datasource with name {status_response['dataSource']['name']} and current status {status_response['dataSource']['status']}") 367 | return datasource_id, status_response['dataSource']['status'] 368 | 369 | 370 | def ingest_and_wait(bedrock_agent, data_source_id , knowledge_base_id, documents): 371 | 372 | print("Ingesting documents...") 373 | bedrock_agent.ingest_knowledge_base_documents( 374 | dataSourceId=data_source_id, 375 | knowledgeBaseId=knowledge_base_id, 376 | documents=[ 377 | get_document_configuration(document['document_id'], document['plan_name'], document['document_uri']) 378 | for document in documents 379 | ] 380 | ) 381 | 382 | def wait_for_single_document(document): 383 | return wait_for_completion( 384 | client=bedrock_agent, 385 | get_status_function=bedrock_agent.get_knowledge_base_documents, 386 | status_kwargs={ 387 | 'dataSourceId': data_source_id, 388 | 'knowledgeBaseId': knowledge_base_id, 389 | 'documentIdentifiers': [{ 390 | 'custom': { 391 | 'id': document['document_id'] 392 | }, 393 | 'dataSourceType': 'CUSTOM'}] 394 | }, 395 | completion_states=['INDEXED'], 396 | error_states=['FAILED'], 397 | status_path_in_response='documentDetails[0].status', 398 | max_iterations=5, 399 | delay=5, verbose=False 400 | ) 401 | 402 | # Use ThreadPoolExecutor to run wait_for_completion in parallel 403 | with concurrent.futures.ThreadPoolExecutor() as executor: 404 | # Submit all tasks 405 | future_to_document = {executor.submit(wait_for_single_document, document): document for document in documents} 406 | 407 | # Wait for all tasks to complete and collect results 408 | results = [] 409 | for future in concurrent.futures.as_completed(future_to_document): 410 | document = future_to_document[future] 411 | try: 412 | result = future.result() 413 | result.update(document) 414 | results.append(result['documentDetails'][0]) 415 | except Exception as exc: 416 | print(f"Document {document['document_id']} generated an exception: {exc}") 417 | results.append((document, None)) 418 | raise 419 | print("Ingestion complete.") 420 | 421 | # Consolidate and return results 422 | return results 423 | 424 | 425 | def add_lambda_permission( 426 | function_name, 427 | principal, 428 | action, 429 | source_arn=None, 430 | verbose = False 431 | ): 432 | lambda_client = boto3.client('lambda') 433 | try: 434 | statement_id_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6)) 435 | statement_id = f"claims-review-agent-actions-{statement_id_suffix}" 436 | kwargs = { 437 | 'FunctionName': function_name, 438 | 'StatementId': statement_id, 439 | 'Action': action, 440 | 'Principal': principal 441 | } 442 | # Add source_arn if provided 443 | if source_arn: 444 | kwargs['SourceArn'] = source_arn 445 | response = lambda_client.add_permission(**kwargs) 446 | print(f"Successfully added permission: {response}") 447 | return response 448 | except Exception as e: 449 | print(f"Error adding permission: {str(e)}") 450 | raise e 451 | 452 | 453 | def invoke_agent_helper(bedrock_agent_runtime_client, query, 454 | session_id, agent_id, alias_id, 455 | enable_trace=False, session_state=None): 456 | 457 | end_session: bool = False 458 | if not session_state: 459 | session_state = {} 460 | 461 | # Create main output widget for final answer 462 | final_answer_output = Output() 463 | 464 | # Create tab widget 465 | tab = Tab() 466 | tab_contents = [] 467 | 468 | if enable_trace: 469 | display(final_answer_output) 470 | display(tab) 471 | 472 | def extract_trace_info(trace_event): 473 | """Helper function to extract relevant information from trace""" 474 | trace = trace_event.get('trace', {}) 475 | orchestration_trace = trace.get('orchestrationTrace', {}) 476 | model_invocation = orchestration_trace.get('modelInvocationInput', {}) 477 | 478 | return { 479 | 'Trace ID': model_invocation.get('traceId', ''), 480 | 'Type': model_invocation.get('type', ''), 481 | 'Text': model_invocation.get('text', '') 482 | } 483 | 484 | def create_trace_tab(trace_info): 485 | """Helper function to create a new tab for a trace""" 486 | output = Output() 487 | with output: 488 | df = pd.DataFrame([trace_info]) 489 | display(HTML(f""" 490 | 496 | {df.to_html(escape=False, index=False)} 497 | """)) 498 | return output 499 | 500 | # invoke the agent API 501 | agent_response = bedrock_agent_runtime_client.invoke_agent( 502 | inputText=query, 503 | agentId=agent_id, 504 | agentAliasId=alias_id, 505 | sessionId=session_id, 506 | enableTrace=enable_trace, 507 | endSession=end_session, 508 | sessionState=session_state 509 | ) 510 | 511 | event_stream = agent_response['completion'] 512 | try: 513 | for event in event_stream: 514 | if 'chunk' in event: 515 | data = event['chunk']['bytes'] 516 | agent_answer = data.decode('utf8') 517 | return agent_answer 518 | elif 'trace' in event: 519 | if enable_trace: 520 | trace_info = extract_trace_info(event.get('trace',{})) 521 | if trace_info['Trace ID']: # Only add if we have a valid trace 522 | # Create new tab for this trace 523 | new_tab = create_trace_tab(trace_info) 524 | tab_contents.append(new_tab) 525 | 526 | # Update tab widget 527 | tab.children = tuple(tab_contents) 528 | # Set tab title 529 | tab.set_title(len(tab_contents) - 1, f"Trace {len(tab_contents)}") 530 | else: 531 | raise Exception("unexpected event.", event) 532 | except Exception as e: 533 | raise Exception("unexpected event.", e) 534 | 535 | -------------------------------------------------------------------------------- /20-Industry-Use-Cases/22-Medical-Claims-Processing/utils/helper_functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import boto3 4 | from urllib.parse import urlparse 5 | import requests 6 | import base64 7 | import io 8 | from PIL import Image 9 | from PyPDF2 import PdfReader, PdfWriter 10 | from botocore.exceptions import ClientError 11 | from IPython.display import HTML 12 | from IPython.display import display 13 | from botocore.auth import SigV4Auth 14 | from botocore.awsrequest import AWSRequest 15 | import json 16 | import ipywidgets as widgets 17 | import html 18 | import pandas as pd 19 | 20 | s3_client = boto3.client("s3") 21 | bda_client = boto3.client('bedrock-data-automation') 22 | bda_runtime_client = boto3.client('bedrock-data-automation-runtime') 23 | cfn = boto3.client(service_name='cloudformation') 24 | region_name = boto3.session.Session().region_name 25 | # Dictionary to store the outputs 26 | resource_attributes = {} 27 | target_output_key = 'BDAWorkshopVPC' 28 | 29 | def get_stack_outputs(): 30 | # Initialize CloudFormation client 31 | cf_client = boto3.client('cloudformation', region_name=region_name) 32 | try: 33 | # Get all stacks 34 | paginator = cf_client.get_paginator('list_stacks') 35 | for page in paginator.paginate(StackStatusFilter=['CREATE_COMPLETE', 'UPDATE_COMPLETE']): 36 | for stack in page['StackSummaries']: 37 | stack_name = stack['StackName'] 38 | # Get stack details including outputs 39 | try: 40 | response = cf_client.describe_stacks(StackName=stack_name) 41 | # Check if stack has outputs 42 | if 'Outputs' in response['Stacks'][0]: 43 | outputs = response['Stacks'][0]['Outputs'] 44 | # Look for target OutputKey 45 | if any(output['OutputKey'] == target_output_key 46 | for output in outputs): 47 | # Found the stack with target OutputKey, get all its outputs 48 | for output in outputs: 49 | resource_attributes[output['OutputKey']] = output['OutputValue'] 50 | return resource_attributes 51 | except cf_client.exceptions.ClientError as e: 52 | print(f"Error describing stack {stack_name}: {str(e)}") 53 | continue 54 | print(f"No stack found with OutputKey: {target_output_key}") 55 | return None 56 | except Exception as e: 57 | print(f"Error: {str(e)}") 58 | return None 59 | 60 | 61 | def get_stack_output(stack_name, output_key): 62 | response = cfn.describe_stacks( StackName=stack_name) 63 | stack = next((s for s in response['Stacks'] if s['StackName'] == stack_name), None) 64 | return next((o['OutputValue'] for o in stack['Outputs'] if o['OutputKey'] == output_key), None) if stack else None 65 | 66 | 67 | def pil_to_bytes(image): 68 | byte_arr = io.BytesIO() 69 | image.save(byte_arr, format='PNG') 70 | return byte_arr.getvalue() 71 | 72 | 73 | def display_image(image): 74 | image_widget = widgets.Image(value=pil_to_bytes(image), format='png') 75 | image_widget.layout.width = '400px' 76 | image_widget.layout.height = 'auto' 77 | image_widget.layout.object_fit = 'contain' 78 | return image_widget 79 | 80 | def json_to_html(json_obj, indent=0): 81 | result = [] 82 | if isinstance(json_obj, dict): 83 | result.append('') 84 | for key, value in json_obj.items(): 85 | result.append('') 86 | result.append(f'') 87 | result.append('') 90 | result.append('') 91 | result.append('
{key}') 88 | result.append(json_to_html(value, indent + 1)) 89 | result.append('
') 92 | elif isinstance(json_obj, list): 93 | result.append('') 94 | for i, item in enumerate(json_obj): 95 | result.append('') 96 | result.append(f'') 97 | result.append('') 100 | result.append('') 101 | result.append('
{i}') 98 | result.append(json_to_html(item, indent + 1)) 99 | result.append('
') 102 | elif isinstance(json_obj, (str, int, float, bool)) or json_obj is None: 103 | if isinstance(json_obj, str): 104 | result.append(f'"{json_obj}"') 105 | elif isinstance(json_obj, bool): 106 | result.append(f'{str(json_obj).lower()}') 107 | elif json_obj is None: 108 | result.append('null') 109 | else: 110 | result.append(f'{json_obj}') 111 | return ''.join(result) 112 | 113 | def display_json(json_data, title): 114 | html_content = f""" 115 |
116 |

{title}

117 |
118 | {json_to_html(json_data)} 119 |
120 |
121 | 160 | """ 161 | return widgets.HTML(html_content) 162 | 163 | def display_image_jsons(image, json_arr, titles): 164 | image_widget = display_image(image) 165 | right_column = widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)]) 166 | bordered_hbox = widgets.HBox([image_widget, right_column]) 167 | bordered_hbox.layout.border = '5px solid black' 168 | bordered_hbox.layout.padding = '10px' 169 | bordered_hbox.layout.margin = '10px' 170 | return bordered_hbox 171 | 172 | def get_bucket_and_key(s3_uri): 173 | parsed_uri = urlparse(s3_uri) 174 | bucket_name = parsed_uri.netloc 175 | object_key = parsed_uri.path.lstrip('/') 176 | return (bucket_name, object_key) 177 | 178 | def wait_for_job_to_complete(invocationArn): 179 | get_status_response = bda_runtime_client.get_data_automation_status( 180 | invocationArn=invocationArn) 181 | status = get_status_response['status'] 182 | job_id = invocationArn.split('/')[-1] 183 | max_iterations = 60 184 | iteration_count = 0 185 | while status not in ['Success', 'ServiceError', 'ClientError']: 186 | print(f'Waiting for Job to Complete. Current status is {status}') 187 | # Wait for kernel restart 188 | time.sleep(10) # nosemgrep 189 | iteration_count += 1 190 | if iteration_count >= max_iterations: 191 | print(f"Maximum number of iterations ({max_iterations}) reached. Breaking the loop.") 192 | break 193 | get_status_response = bda_runtime_client.get_data_automation_status( 194 | invocationArn=invocationArn) 195 | status = get_status_response['status'] 196 | if iteration_count >= max_iterations: 197 | raise Exception("Job did not complete within the expected time frame.") 198 | else: 199 | print(f"Invocation Job with id {job_id} completed. Status is {status}") 200 | return get_status_response 201 | 202 | 203 | def read_s3_object(s3_uri): 204 | # Parse the S3 URI 205 | parsed_uri = urlparse(s3_uri) 206 | bucket_name = parsed_uri.netloc 207 | object_key = parsed_uri.path.lstrip('/') 208 | # Create an S3 client 209 | s3_client = boto3.client('s3') 210 | try: 211 | # Get the object from S3 212 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 213 | 214 | # Read the content of the object 215 | content = response['Body'].read().decode('utf-8') 216 | return content 217 | except Exception as e: 218 | print(f"Error reading S3 object: {e}") 219 | return None 220 | 221 | def download_document(url, start_page_index=None, end_page_index=None, output_file_path=None): 222 | 223 | if not output_file_path: 224 | filename = os.path.basename(url) 225 | output_file_path = filename 226 | 227 | # Download the PDF 228 | response = requests.get(url, timeout=30) # nosemgrep 229 | print(response) 230 | pdf_content = io.BytesIO(response.content) 231 | 232 | # Create a PDF reader object 233 | pdf_reader = PdfReader(pdf_content) 234 | 235 | # Create a PDF writer object 236 | pdf_writer = PdfWriter() 237 | 238 | start_page_index = 0 if not start_page_index else max(start_page_index,0) 239 | end_page_index = len(pdf_reader.pages)-1 if not end_page_index else min(end_page_index,len(pdf_reader.pages)-1) 240 | 241 | # Specify the pages you want to extract (0-indexed) 242 | pages_to_extract = list(range(start_page_index, end_page_index)) 243 | 244 | # Add the specified pages to the writer 245 | for page_num in pages_to_extract: 246 | page = pdf_reader.pages[page_num] 247 | pdf_writer.add_page(page) 248 | 249 | print(f"Created file: {output_file_path}") 250 | # Save the extracted pages to a new PDF 251 | with open(output_file_path, "wb") as output_file: 252 | pdf_writer.write(output_file) 253 | return output_file_path 254 | 255 | 256 | def create_image_html_column(row: pd.Series, image_col: str, width: str = '300px') -> str: 257 | """ 258 | Create HTML embedded image from S3 URI by downloading and base64 encoding the image for a DataFrame row. 259 | 260 | Args: 261 | row (pd.Series): DataFrame row 262 | image_col (str): Name of column containing S3 URI 263 | width (str): Fixed width for image 264 | 265 | Returns: 266 | str: HTML string for embedded image 267 | """ 268 | s3_uri = row[image_col] 269 | if isinstance(s3_uri, list): 270 | s3_uri = s3_uri[0] 271 | if pd.isna(s3_uri): 272 | return '' 273 | 274 | try: 275 | # Parse S3 URI 276 | bucket_name, object_key = get_bucket_and_key(s3_uri) 277 | 278 | 279 | # Initialize S3 client 280 | s3_client = boto3.client('s3') 281 | 282 | # Download image from S3 283 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 284 | image_content = response['Body'].read() 285 | 286 | # Open image using PIL 287 | image = Image.open(io.BytesIO(image_content)) 288 | 289 | # Convert image to RGB if it's in RGBA mode 290 | if image.mode == 'RGBA': 291 | image = image.convert('RGB') 292 | 293 | # Save image to bytes 294 | buffered = io.BytesIO() 295 | image.save(buffered, format="JPEG") 296 | 297 | # Encode image to base64 298 | img_str = base64.b64encode(buffered.getvalue()).decode() 299 | 300 | # Create HTML string with base64 encoded image 301 | return f'' 302 | except Exception as e: 303 | print(f"Error processing image {s3_uri}: {str(e)}") 304 | return '' 305 | 306 | # Example usage: 307 | """ 308 | # Add embedded images column 309 | df['embedded_images'] = add_embedded_images(df, 'crop_images', width='300px') 310 | 311 | # For Jupyter notebook display: 312 | from IPython.display import HTML 313 | HTML(df['embedded_images'].iloc[0]) 314 | """ 315 | 316 | 317 | 318 | def wait_for_completion( 319 | client, 320 | get_status_function, 321 | status_kwargs, 322 | status_path_in_response, 323 | completion_states, 324 | error_states, 325 | max_iterations=60, 326 | delay=10, 327 | verbose=True 328 | ): 329 | for _ in range(max_iterations): 330 | try: 331 | response = get_status_function(**status_kwargs) 332 | status = get_nested_value_new(response, status_path_in_response) 333 | 334 | if status in completion_states: 335 | if(verbose): 336 | print(f"Operation completed successfully with status: {status}") 337 | return response 338 | 339 | if status in error_states: 340 | raise Exception(f"Operation failed with status: {status}") 341 | if(verbose): 342 | print(f"Current status: {status}. Waiting...") 343 | time.sleep(delay) # nosemgrep 344 | 345 | except ClientError as e: 346 | raise Exception(f"Error checking status: {str(e)}") 347 | 348 | raise Exception(f"Operation timed out after {max_iterations} iterations") 349 | 350 | def get_nested_value_new(data, path): 351 | """Get value from nested dict/list using dot path with array support (e.g., 'items[0].name')""" 352 | current = data 353 | try: 354 | for part in path.replace('[', '.[').split('.'): 355 | if not part: 356 | continue 357 | if '[' in part: 358 | name, index = part.split('[') 359 | current = current[name] if name else current 360 | current = current[int(index.rstrip(']'))] 361 | else: 362 | current = current[part] 363 | return current 364 | except (KeyError, IndexError, TypeError, ValueError): 365 | return None 366 | 367 | def get_nested_value(data, path): 368 | """ 369 | Retrieve a value from a nested dictionary using a dot-separated path. 370 | 371 | :param data: The dictionary to search 372 | :param path: A string representing the path to the value, e.g., "Job.Status" 373 | :return: The value at the specified path, or None if not found 374 | """ 375 | keys = path.split('.') 376 | for key in keys: 377 | if isinstance(data, dict) and key in data: 378 | data = data[key] 379 | else: 380 | return None 381 | return data 382 | 383 | 384 | def display_html(data, root='root', expanded=True, bg_color='#f0f0f0'): 385 | html = f""" 386 |
387 | 388 |
{data}
389 |
390 | 405 | """ 406 | display(HTML(html)) 407 | 408 | def send_request(region, url, method, credentials, payload=None, service='bedrock'): 409 | host = url.split("/")[2] 410 | request = AWSRequest( 411 | method, 412 | url, 413 | data=payload, 414 | headers={'Host': host, 'Content-Type':'application/json'} 415 | ) 416 | SigV4Auth(credentials, service, region).add_auth(request) 417 | response = requests.request(method, url, headers=dict(request.headers), data=payload, timeout=50) 418 | response.raise_for_status() 419 | content = response.content.decode("utf-8") 420 | data = json.loads(content) 421 | return data 422 | 423 | def invoke_blueprint_recommendation_async(bda_client, payload): 424 | credentials = boto3.Session().get_credentials().get_frozen_credentials() 425 | region_name = boto3.Session().region_name 426 | url = f"{bda_client.meta.endpoint_url}/invokeBlueprintRecommendationAsync" 427 | print(f'Sending request to {url}') 428 | result = send_request( 429 | region = region_name, 430 | url = url, 431 | method = "POST", 432 | credentials = credentials, 433 | payload=payload 434 | ) 435 | return result 436 | 437 | 438 | def get_blueprint_recommendation(bda_client, job_id): 439 | credentials = boto3.Session().get_credentials().get_frozen_credentials() 440 | region_name = boto3.Session().region_name 441 | url = f"{bda_client.meta.endpoint_url}/getBlueprintRecommendation/{job_id}/" 442 | result = send_request( 443 | region = region_name, 444 | url = url, 445 | method = "POST", 446 | credentials = credentials 447 | ) 448 | return result 449 | 450 | def get_s3_to_dict(s3_url): 451 | bucket_name = s3_url.split('/')[2] 452 | object_key = '/'.join(s3_url.split('/')[3:]) 453 | 454 | # Download the JSON file from S3 455 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 456 | json_content = response['Body'].read().decode('utf-8') 457 | 458 | # Parse the JSON content 459 | json_obj = json.loads(json_content) 460 | return json_obj 461 | 462 | def create_or_update_blueprint(bda_client, blueprint_name, blueprint_description, blueprint_type, blueprint_stage, blueprint_schema): 463 | list_blueprints_response = bda_client.list_blueprints( 464 | blueprintStageFilter='ALL' 465 | ) 466 | blueprint = next((blueprint for blueprint in 467 | list_blueprints_response['blueprints'] 468 | if 'blueprintName' in blueprint and 469 | blueprint['blueprintName'] == blueprint_name), None) 470 | response = None 471 | if not blueprint: 472 | print(f'No existing blueprint found with name={blueprint_name}, creating custom blueprint') 473 | response = bda_client.create_blueprint( 474 | blueprintName=blueprint_name, 475 | type=blueprint_type, 476 | blueprintStage=blueprint_stage, 477 | schema=json.dumps(blueprint_schema) 478 | ) 479 | else: 480 | print(f'Found existing blueprint with name={blueprint_name}, updating Stage and Schema') 481 | response = bda_client.update_blueprint( 482 | blueprintArn=blueprint['blueprintArn'], 483 | blueprintStage=blueprint_stage, 484 | schema=json.dumps(blueprint_schema) 485 | ) 486 | 487 | return response['blueprint']['blueprintArn'] 488 | 489 | 490 | def transform_custom_output(input_json, explainability_info): 491 | result = { 492 | "forms": {}, 493 | "tables": {} 494 | } 495 | 496 | def add_confidence(value, conf_info): 497 | return {"value": value, "confidence": conf_info["confidence"]} if isinstance(conf_info, dict) and "confidence" in conf_info else value 498 | 499 | def process_list_item(item, conf_info): 500 | return {k: add_confidence(v, conf_info.get(k, {})) for k, v in item.items() if isinstance(conf_info, dict)} 501 | 502 | # Iterate through the input JSON 503 | for key, value in input_json.items(): 504 | confidence_data = explainability_info.get(key, {}) 505 | if isinstance(value, list): 506 | # Handle lists (tables) 507 | processed_list = [] 508 | for idx, item in enumerate(value): 509 | if isinstance(item, dict): 510 | # Process each item in the list using its corresponding confidence info 511 | conf_info = confidence_data[idx] if isinstance(confidence_data, list) else confidence_data 512 | processed_list.append(process_list_item(item, conf_info)) 513 | result["tables"][key] = processed_list 514 | else: 515 | # Handle simple key-value pairs (forms) 516 | result["forms"][key] = add_confidence(value, confidence_data) 517 | 518 | return result 519 | 520 | 521 | def get_summaries(custom_outputs): 522 | return [{ 523 | 'page_indices': output.get('split_document', {}).get('page_indices'), 524 | 'matched_blueprint_name': output.get('matched_blueprint', {}).get('name'), 525 | 'confidence': output.get('matched_blueprint', {}).get('confidence'), 526 | 'document_class_type': output.get('document_class', {}).get('type') 527 | } if output else {} for output in custom_outputs] 528 | 529 | def show_popup_link(label, content, unique_id): 530 | # Create HTML with CSS and JavaScript 531 | html_content = f""" 532 | 589 | 590 | 591 | 592 | 598 | 599 | 624 | """ 625 | 626 | display(HTML(html_content)) 627 | 628 | 629 | --------------------------------------------------------------------------------