60 |
99 | """
100 | return widgets.HTML(html_content)
101 |
102 | def display_image_jsons(image, json_arr, titles):
103 | image_widget = display_image(image)
104 | right_column = widgets.VBox([display_json(data, title) for data, title in zip(json_arr, titles)])
105 | bordered_hbox = widgets.HBox([image_widget, right_column])
106 | bordered_hbox.layout.border = '5px solid black'
107 | bordered_hbox.layout.padding = '10px'
108 | bordered_hbox.layout.margin = '10px'
109 | return bordered_hbox
110 |
111 |
112 | def get_s3_to_dict(s3, s3_url):
113 | bucket_name = s3_url.split('/')[2]
114 | object_key = '/'.join(s3_url.split('/')[3:])
115 |
116 | # Download the JSON file from S3
117 | response = s3.get_object(Bucket=bucket_name, Key=object_key)
118 | json_content = response['Body'].read().decode('utf-8')
119 |
120 | # Parse the JSON content
121 | json_obj = json.loads(json_content)
122 | return json_obj
--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/agent_resources/agent_prompt.txt:
--------------------------------------------------------------------------------
1 | You are a Claims Reviewer AI assistant. Your task is to review insurance claims following a specific process using provided function calls and a knowledge base. At the end of the review you
2 | would provide a detailed report of the review findings and status.
3 | To finish the review carry out all the steps detailed below carefully and thoroughly. DO NOT ASK THE USER FOR MORE INFORMATION. ALL information is available in the claim form data
4 |
5 | STEP 1 - EXTRACT CLAIM FORM DATA
6 | - To begin with You will be provided with a claim form URI. You must first get the claim form data from S3 using the given URI as input.
7 | - Use the function call get_claim_form_data(claim_form_uri) to get the claim form data.
8 | - Once you have the claim form data, Keep a note of all the fields and their values, you would use all of the fields in the form data in later steps.
9 |
10 | STEP 2 - VERIFY INSURED MEMBER AND PATIENT DETAILS
11 | - Use the insured id number, patient last name and patient date of birth from the claim form data to get the member and patient detail from the claims database
12 | - Compare the insured member details with the details in the claim form data
13 | - for each detail, add an entry to your final report. Use this table format
14 | | Field Name | Claim Form Data | Database Data | Match or No Match |
15 | |------------|-----------------|---------------|-------------------|
16 | - If any discrepancies are found, add a note to your report and stop the process and respond with final report.
17 | - If the insured member and patient details are verified, add a note to your report and continue the process
18 | - Continue to Step 3
19 |
20 | STEP 3 CREATE CLAIM RECORD
21 | - Once and only if the insured member and patient details are matched Use the function call createClaim to create a claim record in the claims database.
22 | - use the data already gathered in the previous step to call the action to create a claim record
23 | 1. The patient details
24 | 2. The insured member details
25 | 3. Fields in the Claim form data
26 | - Use "IN_PROGRESS" as the status of the claim record
27 | - keep a note of the claim id returned after creating the claim data, you will need it later.
28 | - If the claim record is created, add a note to your final report
29 | - If the claim record is not created, add a note to your report and stop the process and respond with final report
30 | - CONTINUE TO STEP 4
31 |
32 | STEP 4. RETRIEVE EVIDENCE OF COVERAGE DETAILS FOR THE INSURANCE PLAN
33 | - Using the insured_plan_name from the insured member detai find a matching document in the Claims Evidence of Coverage Knowledge Base
34 | - STRICTLY USE only the document that matches the insured_plan_name.
35 | - If no document is found, add a note to your report and stop the process and respond with final report.
36 | - If document is found, add a note to your report and continue the process
37 | - CONTINUE TO STEP 5
38 |
39 | STEP 5. EVALUATE COVERAGE
40 | - Use the claim form data to identify the services, treatments, procedures, and charges.
41 | - Add to your note the list of services, treatments, procedures, respective date, place and associated charges.
42 | - Using the details of each of the service, procedure code and charges in the claim form data search the content from evidence of coverage document to determine if that particular service/procedure or treatement it's covered by the specific insurance plan
43 | - Add the findings in your final report in this format along with a snippet of text from the evidence of coverage document that supports your findings
44 | | Service/Procedure | Date | Place | Charges | Covered/Not Covered | Relevant Justification
45 | |-------------------------------------|-----------|------------|---------|-----------------------|----------------------------------------------------------------------|
46 | - For each service/procedure, add an entry to your report.
47 | - CONTINUE TO STEP 6
48 |
49 | STEP 6. UPDATE CLAIM STATUS
50 | - If all services are covered:
51 | * Update the claim record using the claim id to set the status to "ELIGIBLE"
52 | - If some or no services are covered:
53 | * Update the claim record using the claim id to set the status to "ADJUDICATOR_REVIEW"
54 | - CONTINUE TO STEP 7
55 |
56 | STEP 7. Respond with the final report with the following contents
57 | - Table containing the member and patient details and if they match with details in the database
58 | - The table with services/procedures and their coverage status
59 | - State the final claim status (ELIGIBLE or ADJUDICATOR_REVIEW).
60 |
61 | When responding, please provide a thorough analysis following these steps. Be precise in your language, citing specific details from the claim form and EoC document.
62 | If you need any clarification or additional information to complete the review, please ask. Your goal is to ensure accurate and fair claim processing
63 | while adhering to the insurance plan's coverage guidelines.
64 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Document Processing with Amazon Bedrock Data Automation
2 |
3 | ## How Bedrock Data Automation works
4 |
5 | Bedrock Data Automation (BDA) lets you configure output based on your processing needs for a specific data type: documents, images, video or audio. BDA can generate standard output or custom output. Below are some key concepts for understanding how BDA works. If you're a new user, start with the information about standard output.
6 |
7 | * **Standard output** – Sending a file to BDA with no other information returns the default standard output, which consists of commonly required information that's based on the data type. Examples include audio transcriptions, scene summaries for video, and document summaries. These outputs can be tuned to your use case using projects to modify them. For more information, see e.g. [Standard output for documents in Bedrock Data Automation](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-output-documents.html).
8 |
9 | * **Custom output** – For documents and images, only. Choose custom output to define exactly what information you want to extract using a blueprint. A blueprint consists of a list of expected fields that you want retrieved from a document or image. Each field represents a piece of information that needs to be extracted to meet your specific use case. You can create your own blueprints, or select predefined blueprints from the BDA blueprint catalog. For more information, see [Custom output and blueprints](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-custom-output-idp.html).
10 |
11 | * **Projects** – A project is a BDA resource that allows you to modify and organize output configurations. Each project can contain standard output configurations for documents, images, video, and audio, as well as custom output blueprints for documents and images. Projects are referenced in the `InvokeDataAutomationAsync` API call to instruct BDA on how to process the files. For more information about projects and their use cases, see [Bedrock Data Automation projects](https://docs.aws.amazon.com/bedrock/latest/userguide/bda-projects.html).
12 |
13 |
14 |
15 | This workshop contains the following sections
16 |
17 | * **1 - Understanding Bedrock Data Automation**
18 | * [Getting Started - How Bedrock Data Automation works](10-Understanding-BDA/11_getting_started_with_bda.ipynb)
19 | * [Document Insights with Standard Outputs](10-Understanding-BDA/12_standard_output_extended.ipynb)
20 | * [Custom Document Insights with Blueprints](10-Understanding-BDA/13_custom_outputs_and_blueprints.ipynb)
21 | * **2 - Industry Use Cases - Document Processing**
22 | * [Mortgage and Lending Flow](20-Industry-Use-Cases/21-Mortgage-and-Lending/21_mortgage_and_lending.ipynb)
23 | * [Medical Claims Processing with Agents](20-Industry-Use-Cases/22-Medical-Claims-Processing/22_medical_claims_processing.ipynb)
24 |
25 | * **3 - Bedrock Data Automation Patterns (Coming Soon)**
26 |
27 | ### Use Cases
28 |
29 | Here are some example use cases that BDA can help you with -
30 |
31 | **Document processing**: Automate Intelligent Document Processing workflows at scale, transforming unstructured documents into structured data outputs that can be customized to integrate with existing systems and workflows.
32 |
33 | **Media analysis**: Extract meaningful insights from unstructured video by creating scene summaries, identifying unsafe/explicit content, extracting text, and classifying content, enabling intelligent video search, contextual advertising, and brand safety/compliance.
34 |
35 | **Generative AI assistants**: Enhance the performance of your retrieval-augmented generation (RAG) powered question answering applications by providing them with rich, modality-specific data representations extracted from your documents, images, video, and audio.
36 |
37 | ### Getting Started
38 |
39 | * Create Jupyterlab space in Amazon Sagemaker Studio or any other environment
40 | * Make sure you have the required IAM role permissions
41 | * Checkout the repository
42 | * Run through the notebooks
43 |
44 | ### Required IAM Permissions
45 |
46 | The features being explored in the notebook require the following IAM Policies for the execution role being used. If you're running this notebook within SageMaker Studio in your own Account, update the default execution role for the SageMaker user profile to include the following IAM policies.
47 |
48 | When using your own AWS Account to run this workshop, use AWS regions `us-east-1` or `us-west-2` where Bedrock Data Automation is available as of this writing.
49 |
50 | ```json
51 | [
52 | {
53 | "Sid": "BDACreatePermissions",
54 | "Effect": "Allow",
55 | "Action": [
56 | "bedrock:CreateDataAutomationProject",
57 | "bedrock:CreateBlueprint"
58 | ],
59 | "Resource": "*"
60 | },
61 | {
62 | "Sid": "BDAOProjectsPermissions",
63 | "Effect": "Allow",
64 | "Action": [
65 | "bedrock:CreateDataAutomationProject",
66 | "bedrock:UpdateDataAutomationProject",
67 | "bedrock:GetDataAutomationProject",
68 | "bedrock:GetDataAutomationStatus",
69 | "bedrock:ListDataAutomationProjects",
70 | "bedrock:InvokeDataAutomationAsync"
71 | ],
72 | "Resource": "arn:aws:bedrock:::data-automation-project/*"
73 | },
74 | {
75 | "Sid": "BDABlueprintPermissions",
76 | "Effect": "Allow",
77 | "Action": [
78 | "bedrock:GetBlueprint",
79 | "bedrock:ListBlueprints",
80 | "bedrock:UpdateBlueprint",
81 | "bedrock:DeleteBlueprint"
82 | ],
83 | "Resource": "arn:aws:bedrock:::blueprint/*"
84 | },
85 |
86 |
87 | {
88 | "Sid": "BDACrossRegionInference",
89 | "Effect": "Allow",
90 | "Action": ["bedrock:InvokeDataAutomationAsync"],
91 | "Resource": [
92 | "arn:aws:bedrock:us-east-1:account_id:data-automation-profile/us.data-automation-v1",
93 | "arn:aws:bedrock:us-east-2:account_id:data-automation-profile/us.data-automation-v1",
94 | "arn:aws:bedrock:us-west-1:account_id:data-automation-profile/us.data-automation-v1",
95 | "arn:aws:bedrock:us-west-2:account_id:data-automation-profile/us.data-automation-v1"]
96 | }
97 | ]
98 | ```
99 |
100 | Note - The policy uses wildcard(s) for demo purposes. AWS recommends using least privileges when defining IAM Policies in your own AWS Accounts. See [Security Best Practices in IAM](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html)
101 |
102 |
103 | ## Contributors
104 |
105 | * Raja Vaidyanathan
106 | * Arlind Nocaj
107 | * Conor Manton
108 | * Luca Perrozzi
--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/explanation_of_benefits.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "description": "A blueprint for a Remittance Advice (RA) or Explanation of Benefits (EOB), which is a standard document sent by insurance companies to detail how a medical claim was processed showing breakdown of charges, what the insurance paid, any discounts and amount due",
4 | "class": "Explanation of Benefits",
5 | "type": "object",
6 | "definitions": {
7 | "PaymentDetail": {
8 | "type": "object",
9 | "properties": {
10 | "paid_to": {
11 | "type": "string",
12 | "inferenceType": "explicit",
13 | "instruction": "Who the payment was made to"
14 | },
15 | "check_number": {
16 | "type": "string",
17 | "inferenceType": "explicit",
18 | "instruction": "The check number"
19 | },
20 | "amount": {
21 | "type": "number",
22 | "inferenceType": "explicit",
23 | "instruction": "The payment amount"
24 | }
25 | }
26 | },
27 | "payment_details": {
28 | "type": "object",
29 | "properties": {
30 | "paid_to": {
31 | "type": "string",
32 | "inferenceType": "explicit",
33 | "instruction": "Who the payment was made to"
34 | },
35 | "check_number": {
36 | "type": "string",
37 | "inferenceType": "explicit",
38 | "instruction": "The check number"
39 | },
40 | "amount": {
41 | "type": "number",
42 | "inferenceType": "explicit",
43 | "instruction": "The payment amount"
44 | }
45 | }
46 | },
47 | "claim_summary": {
48 | "type": "object",
49 | "properties": {
50 | "claim_number": {
51 | "type": "number",
52 | "inferenceType": "explicit",
53 | "instruction": "The claim number"
54 | },
55 | "patient_name": {
56 | "type": "string",
57 | "inferenceType": "explicit",
58 | "instruction": "The Patient Name associated with the claim"
59 | },
60 | "billed_amount": {
61 | "type": "number",
62 | "inferenceType": "explicit",
63 | "instruction": "Billed Amount"
64 | },
65 | "provider_discount": {
66 | "type": "number",
67 | "inferenceType": "explicit",
68 | "instruction": "Provider Discount"
69 | },
70 | "ucr_amount": {
71 | "type": "number",
72 | "inferenceType": "explicit",
73 | "instruction": "UCR amount in dollars"
74 | },
75 | "ineligible_amount": {
76 | "type": "number",
77 | "inferenceType": "explicit",
78 | "instruction": "Ineligible Amount in dollars"
79 | },
80 | "deductible_amount": {
81 | "type": "number",
82 | "inferenceType": "explicit",
83 | "instruction": "Deductible Amount in dollars"
84 | },
85 | "copay_amount": {
86 | "type": "number",
87 | "inferenceType": "explicit",
88 | "instruction": "the copay amount in dollars"
89 | },
90 | "payment_amount": {
91 | "type": "number",
92 | "inferenceType": "explicit",
93 | "instruction": "Payment Amount"
94 | }
95 | }
96 | },
97 | "claim_details": {
98 | "type": "object",
99 | "properties": {
100 | "dates_of_services": {
101 | "type": "string",
102 | "inferenceType": "explicit",
103 | "instruction": "Dates of Services"
104 | },
105 | "procedure_code": {
106 | "type": "string",
107 | "inferenceType": "explicit",
108 | "instruction": "Procedure Code"
109 | },
110 | "billed_amount": {
111 | "type": "number",
112 | "inferenceType": "explicit",
113 | "instruction": "Billed Amount in Dollars"
114 | },
115 | "provider_discount": {
116 | "type": "number",
117 | "inferenceType": "explicit",
118 | "instruction": "Provider Discount in Dollars"
119 | },
120 | "max_plan_allowable": {
121 | "type": "number",
122 | "inferenceType": "explicit",
123 | "instruction": "Maximum Plan Allowable in Dollars"
124 | },
125 | "ineligible_amount": {
126 | "type": "number",
127 | "inferenceType": "explicit",
128 | "instruction": "Ineligible Amount in Dollars"
129 | },
130 | "remark_code": {
131 | "type": "string",
132 | "inferenceType": "explicit",
133 | "instruction": "Remark Code"
134 | },
135 | "deductible_amount": {
136 | "type": "number",
137 | "inferenceType": "explicit",
138 | "instruction": "Deductible Amount in Dollars"
139 | },
140 | "copay_amount": {
141 | "type": "number",
142 | "inferenceType": "explicit",
143 | "instruction": "Copay Amount in Dollars"
144 | },
145 | "paid_at": {
146 | "type": "number",
147 | "inferenceType": "explicit",
148 | "instruction": "Paid at (percentage)"
149 | },
150 | "payment_amount": {
151 | "type": "number",
152 | "inferenceType": "explicit",
153 | "instruction": "Payment Amount in Dollars"
154 | }
155 | }
156 | }
157 | },
158 | "properties": {
159 | "employer": {
160 | "type": "string",
161 | "inferenceType": "explicit",
162 | "instruction": "The employer name"
163 | },
164 | "group_number": {
165 | "type": "string",
166 | "inferenceType": "explicit",
167 | "instruction": "The group number"
168 | },
169 | "date": {
170 | "type": "string",
171 | "inferenceType": "explicit",
172 | "instruction": "The date"
173 | },
174 | "check_number": {
175 | "type": "string",
176 | "inferenceType": "explicit",
177 | "instruction": "The check number"
178 | },
179 | "claim_number": {
180 | "type": "string",
181 | "inferenceType": "explicit",
182 | "instruction": "The claim number"
183 | },
184 | "patient_name": {
185 | "type": "string",
186 | "inferenceType": "explicit",
187 | "instruction": "The patient name"
188 | },
189 | "member_id": {
190 | "type": "string",
191 | "inferenceType": "explicit",
192 | "instruction": "The member ID"
193 | },
194 | "patient_responsibility": {
195 | "type": "number",
196 | "inferenceType": "explicit",
197 | "instruction": "The patient's responsibility amount"
198 | },
199 | "other_credits_or_adjustments": {
200 | "type": "number",
201 | "inferenceType": "explicit",
202 | "instruction": "Any other credits or adjustments amount"
203 | },
204 | "total_payment": {
205 | "type": "number",
206 | "inferenceType": "explicit",
207 | "instruction": "The total payment amount"
208 | },
209 | "paid_to": {
210 | "type": "string",
211 | "inferenceType": "explicit",
212 | "instruction": "Who the payment was made to"
213 | },
214 | "payment_details": {
215 | "type": "array",
216 | "instruction": "The payment details table",
217 | "items": {
218 | "$ref": "#/definitions/payment_details"
219 | }
220 | },
221 | "claim_details": {
222 | "type": "array",
223 | "instruction": "details of services that form the part of the claim",
224 | "items": {
225 | "$ref": "#/definitions/claim_details"
226 | }
227 | },
228 | "claim_summary": {
229 | "$ref": "#/definitions/claim_summary"
230 | }
231 | }
232 | }
--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/claims_form.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "class": "CMS 1500 Claim Form",
4 | "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.",
5 | "definitions": {
6 | "Procedure_Service_Supplies": {
7 | "properties": {
8 | "service_start_date": {
9 | "type": "string",
10 | "inferenceType": "explicit",
11 | "instruction": "The service start date from item 24A in YYYY-MM-DD format"
12 | },
13 | "service_end_date": {
14 | "type": "string",
15 | "inferenceType": "explicit",
16 | "instruction": "The service end date from item 24A in YYYY-MM-DD format"
17 | },
18 | "place_of_service": {
19 | "type": "string",
20 | "instruction": "The place the service was provided"
21 | },
22 | "type_of_service": {
23 | "type": "string",
24 | "instruction": "The type of medical service"
25 | },
26 | "procedure_modifier": {
27 | "type": "string",
28 | "inferenceType": "explicit",
29 | "instruction": "The procedure modifier from item 24D"
30 | },
31 | "diagnosis_code": {
32 | "type": "string",
33 | "inferenceType": "explicit",
34 | "instruction": "The diagnosis code from item 24E"
35 | },
36 | "procedure_code": {
37 | "type": "string",
38 | "instruction": "The procedure code"
39 | },
40 | "charge_amount": {
41 | "type": "number",
42 | "instruction": "The charge amount for the procedure"
43 | }
44 | }
45 | }
46 | },
47 | "properties": {
48 | "insurance_program": {
49 | "type": "string",
50 | "inferenceType": "explicit",
51 | "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan"
52 | },
53 | "insured_id_number": {
54 | "type": "string",
55 | "inferenceType": "explicit",
56 | "instruction": "The insured's ID number from item 1a"
57 | },
58 | "patient_name": {
59 | "type": "string",
60 | "inferenceType": "explicit",
61 | "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format"
62 | },
63 | "patient_date_of_birth": {
64 | "type": "string",
65 | "inferenceType": "explicit",
66 | "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format"
67 | },
68 | "insured_name": {
69 | "type": "string",
70 | "inferenceType": "explicit",
71 | "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format"
72 | },
73 | "patient_address": {
74 | "type": "string",
75 | "inferenceType": "explicit",
76 | "instruction": "The patient's address from item 5"
77 | },
78 | "patient_relationship_to_insured": {
79 | "type": "string",
80 | "inferenceType": "explicit",
81 | "instruction": "The patient's relationship to insured from item 6"
82 | },
83 | "insured_address": {
84 | "type": "string",
85 | "inferenceType": "explicit",
86 | "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code"
87 | },
88 | "insured_phone_number": {
89 | "type": "string",
90 | "inferenceType": "explicit",
91 | "instruction": "The insured's phone number, including area code from item 7 "
92 | },
93 | "patient_sex": {
94 | "type": "string",
95 | "inferenceType": "explicit",
96 | "instruction": "The patient's address from item 8"
97 | },
98 | "patient_marital_status": {
99 | "type": "string",
100 | "inferenceType": "explicit",
101 | "instruction": "The patient's address from item 8"
102 | },
103 | "patient_condition_related_to": {
104 | "type": "string",
105 | "inferenceType": "explicit",
106 | "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10"
107 | },
108 | "insured_policy_feca_number": {
109 | "type": "string",
110 | "inferenceType": "explicit",
111 | "instruction": "The insured's policy group or FECA number from item 11"
112 | },
113 | "insured_date_of_birth": {
114 | "type": "string",
115 | "inferenceType": "explicit",
116 | "instruction": "The insured's policy or group number from item 11a"
117 | },
118 | "insured_employer_or_school": {
119 | "type": "string",
120 | "inferenceType": "explicit",
121 | "instruction": "The insured's employer or school 11b"
122 | },
123 | "insured_insurance_plan_name": {
124 | "type": "string",
125 | "inferenceType": "explicit",
126 | "instruction": "The insured's plan name or program name from item 11c"
127 | },
128 | "another_health_benefit_plan_indicator": {
129 | "type": "boolean",
130 | "inferenceType": "explicit",
131 | "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d"
132 | },
133 | "patient_signed_date": {
134 | "type": "string",
135 | "inferenceType": "explicit",
136 | "instruction": "patient's or authorized person's signature date from item 12"
137 | },
138 | "insured_signed_date": {
139 | "type": "string",
140 | "inferenceType": "explicit",
141 | "instruction": "The insured's or authorized person's signed date from item 13"
142 | },
143 | "illness_injury_date": {
144 | "type": "string",
145 | "inferenceType": "explicit",
146 | "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format"
147 | },
148 | "previous_illness_date": {
149 | "type": "string",
150 | "inferenceType": "explicit",
151 | "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format"
152 | },
153 | "unable_to_work_start_date": {
154 | "type": "string",
155 | "inferenceType": "explicit",
156 | "instruction": "The dates the patient was unable to work from item 16"
157 | },
158 | "unable_to_work_end_date": {
159 | "type": "string",
160 | "inferenceType": "explicit",
161 | "instruction": "The dates the patient was unable to work until item 16"
162 | },
163 | "referring_physician": {
164 | "type": "string",
165 | "inferenceType": "explicit",
166 | "instruction": "The name of the referring physician from item 17"
167 | },
168 | "referring_physician_id": {
169 | "type": "string",
170 | "inferenceType": "explicit",
171 | "instruction": "The ID number of the referring physician from item 17a"
172 | },
173 | "hospitalization_start_date": {
174 | "type": "string",
175 | "inferenceType": "explicit",
176 | "instruction": "The hospitalization start date related to current services from item 18"
177 | },
178 | "hospitalization_end_date": {
179 | "type": "string",
180 | "inferenceType": "explicit",
181 | "instruction": "The hospitalization end date related to current services from item 18"
182 | },
183 | "is_outside_lab_indicator": {
184 | "type": "boolean",
185 | "inferenceType": "explicit",
186 | "instruction": "Are there outside lab charges? from item 20"
187 | },
188 | "outside_lab_charges": {
189 | "type": "string",
190 | "inferenceType": "explicit",
191 | "instruction": "Whether outside lab was used and charges from item 20"
192 | },
193 | "diagnosis_1": {
194 | "type": "string",
195 | "inferenceType": "explicit",
196 | "instruction": "The diagnosis or nature of illness or injury from item 21.1"
197 | },
198 | "diagnosis_2": {
199 | "type": "string",
200 | "inferenceType": "explicit",
201 | "instruction": "The diagnosis or nature of illness or injury from item 21.2"
202 | },
203 | "diagnosis_3": {
204 | "type": "string",
205 | "inferenceType": "explicit",
206 | "instruction": "The diagnosis or nature of illness or injury from item 21.3"
207 | },
208 | "diagnosis_4": {
209 | "type": "string",
210 | "inferenceType": "explicit",
211 | "instruction": "The diagnosis or nature of illness or injury from item 21.4"
212 | },
213 | "medicaid_resubmission_number": {
214 | "type": "string",
215 | "inferenceType": "explicit",
216 | "instruction": "MEDICAID RESUBMISSION NUMBER from item 22"
217 | },
218 | "medicaid_original_ref_number": {
219 | "type": "string",
220 | "inferenceType": "explicit",
221 | "instruction": "Medicaid - Original ref no. from item 22"
222 | },
223 | "prior_authorization_number": {
224 | "type": "string",
225 | "inferenceType": "explicit",
226 | "instruction": "The prior authorization number from item 23"
227 | },
228 | "medical_procedures": {
229 | "type": "array",
230 | "instruction": "The list of medical procedures from the table in item 24",
231 | "items": {
232 | "$ref": "#/definitions/Procedure_Service_Supplies"
233 | }
234 | },
235 | "tax_id_type": {
236 | "type": "string",
237 | "inferenceType": "explicit",
238 | "instruction": "The tax ID type (SSN or EIN) from item 25"
239 | },
240 | "tax_id_number": {
241 | "type": "string",
242 | "inferenceType": "explicit",
243 | "instruction": "The federal tax ID number (SSN or EIN) from item 25"
244 | },
245 | "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"},
246 | "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"}
247 | }
248 | }
--------------------------------------------------------------------------------
/10-Understanding-BDA/data/blueprints/blueprint_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "class": "CMS 1500 Claim Form",
4 | "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.",
5 | "definitions": {
6 | "Procedure_Service_Supplies": {
7 | "properties": {
8 | "service_start_date": {
9 | "type": "string",
10 | "inferenceType": "explicit",
11 | "instruction": "The service start date from item 24A in YYYY-MM-DD format"
12 | },
13 | "service_end_date": {
14 | "type": "string",
15 | "inferenceType": "explicit",
16 | "instruction": "The service end date from item 24A in YYYY-MM-DD format"
17 | },
18 | "place_of_service": {
19 | "type": "string",
20 | "instruction": "The place the service was provided"
21 | },
22 | "type_of_service": {
23 | "type": "string",
24 | "instruction": "The type of medical service"
25 | },
26 | "procedure_modifier": {
27 | "type": "string",
28 | "inferenceType": "explicit",
29 | "instruction": "The procedure modifier from item 24D"
30 | },
31 | "diagnosis_code": {
32 | "type": "string",
33 | "inferenceType": "explicit",
34 | "instruction": "The diagnosis code from item 24E"
35 | },
36 | "procedure_code": {
37 | "type": "string",
38 | "instruction": "The procedure code"
39 | },
40 | "charge_amount": {
41 | "type": "number",
42 | "instruction": "The charge amount for the procedure"
43 | }
44 | }
45 | }
46 | },
47 | "properties": {
48 | "insurance_program": {
49 | "type": "string",
50 | "inferenceType": "explicit",
51 | "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan"
52 | },
53 | "insured_id_number": {
54 | "type": "string",
55 | "inferenceType": "explicit",
56 | "instruction": "The insured's ID number from item 1a"
57 | },
58 | "patient_name": {
59 | "type": "string",
60 | "inferenceType": "explicit",
61 | "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format"
62 | },
63 | "patient_date_of_birth": {
64 | "type": "string",
65 | "inferenceType": "explicit",
66 | "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format"
67 | },
68 | "insured_name": {
69 | "type": "string",
70 | "inferenceType": "explicit",
71 | "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format"
72 | },
73 | "patient_address": {
74 | "type": "string",
75 | "inferenceType": "explicit",
76 | "instruction": "The patient's address from item 5"
77 | },
78 | "patient_relationship_to_insured": {
79 | "type": "string",
80 | "inferenceType": "explicit",
81 | "instruction": "The patient's relationship to insured from item 6"
82 | },
83 | "insured_address": {
84 | "type": "string",
85 | "inferenceType": "explicit",
86 | "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code"
87 | },
88 | "insured_phone_number": {
89 | "type": "string",
90 | "inferenceType": "explicit",
91 | "instruction": "The insured's phone number, including area code from item 7 "
92 | },
93 | "patient_sex": {
94 | "type": "string",
95 | "inferenceType": "explicit",
96 | "instruction": "The patient's address from item 8"
97 | },
98 | "patient_marital_status": {
99 | "type": "string",
100 | "inferenceType": "explicit",
101 | "instruction": "The patient's address from item 8"
102 | },
103 | "patient_condition_related_to": {
104 | "type": "string",
105 | "inferenceType": "explicit",
106 | "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10"
107 | },
108 | "insured_policy_feca_number": {
109 | "type": "string",
110 | "inferenceType": "explicit",
111 | "instruction": "The insured's policy group or FECA number from item 11"
112 | },
113 | "insured_date_of_birth": {
114 | "type": "string",
115 | "inferenceType": "explicit",
116 | "instruction": "The insured's policy or group number from item 11a"
117 | },
118 | "insured_employer_or_school": {
119 | "type": "string",
120 | "inferenceType": "explicit",
121 | "instruction": "The insured's employer or school 11b"
122 | },
123 | "insured_insurance_plan_name": {
124 | "type": "string",
125 | "inferenceType": "explicit",
126 | "instruction": "The insured's plan name or program name from item 11c"
127 | },
128 | "another_health_benefit_plan_indicator": {
129 | "type": "boolean",
130 | "inferenceType": "explicit",
131 | "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d"
132 | },
133 | "patient_signed_date": {
134 | "type": "string",
135 | "inferenceType": "explicit",
136 | "instruction": "patient's or authorized person's signature date from item 12"
137 | },
138 | "insured_signed_date": {
139 | "type": "string",
140 | "inferenceType": "explicit",
141 | "instruction": "The insured's or authorized person's signed date from item 13"
142 | },
143 | "illness_injury_date": {
144 | "type": "string",
145 | "inferenceType": "explicit",
146 | "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format"
147 | },
148 | "previous_illness_date": {
149 | "type": "string",
150 | "inferenceType": "explicit",
151 | "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format"
152 | },
153 | "unable_to_work_start_date": {
154 | "type": "string",
155 | "inferenceType": "explicit",
156 | "instruction": "The dates the patient was unable to work from item 16"
157 | },
158 | "unable_to_work_end_date": {
159 | "type": "string",
160 | "inferenceType": "explicit",
161 | "instruction": "The dates the patient was unable to work until item 16"
162 | },
163 | "referring_physician": {
164 | "type": "string",
165 | "inferenceType": "explicit",
166 | "instruction": "The name of the referring physician from item 17"
167 | },
168 | "referring_physician_id": {
169 | "type": "string",
170 | "inferenceType": "explicit",
171 | "instruction": "The ID number of the referring physician from item 17a"
172 | },
173 | "hospitalization_start_date": {
174 | "type": "string",
175 | "inferenceType": "explicit",
176 | "instruction": "The hospitalization start date related to current services from item 18"
177 | },
178 | "hospitalization_end_date": {
179 | "type": "string",
180 | "inferenceType": "explicit",
181 | "instruction": "The hospitalization end date related to current services from item 18"
182 | },
183 | "is_outside_lab_indicator": {
184 | "type": "boolean",
185 | "inferenceType": "explicit",
186 | "instruction": "Are there outside lab charges? from item 20"
187 | },
188 | "outside_lab_charges": {
189 | "type": "string",
190 | "inferenceType": "explicit",
191 | "instruction": "Whether outside lab was used and charges from item 20"
192 | },
193 | "diagnosis_1": {
194 | "type": "string",
195 | "inferenceType": "explicit",
196 | "instruction": "The diagnosis or nature of illness or injury from item 21.1"
197 | },
198 | "diagnosis_2": {
199 | "type": "string",
200 | "inferenceType": "explicit",
201 | "instruction": "The diagnosis or nature of illness or injury from item 21.2"
202 | },
203 | "diagnosis_3": {
204 | "type": "string",
205 | "inferenceType": "explicit",
206 | "instruction": "The diagnosis or nature of illness or injury from item 21.3"
207 | },
208 | "diagnosis_4": {
209 | "type": "string",
210 | "inferenceType": "explicit",
211 | "instruction": "The diagnosis or nature of illness or injury from item 21.4"
212 | },
213 | "medicaid_resubmission_number": {
214 | "type": "string",
215 | "inferenceType": "explicit",
216 | "instruction": "MEDICAID RESUBMISSION NUMBER from item 22"
217 | },
218 | "medicaid_original_ref_number": {
219 | "type": "string",
220 | "inferenceType": "explicit",
221 | "instruction": "Medicaid - Original ref no. from item 22"
222 | },
223 | "prior_authorization_number": {
224 | "type": "string",
225 | "inferenceType": "explicit",
226 | "instruction": "The prior authorization number from item 23"
227 | },
228 | "medical_procedures": {
229 | "type": "array",
230 | "instruction": "The list of medical procedures from the table in item 24",
231 | "items": {
232 | "$ref": "#/definitions/Procedure_Service_Supplies"
233 | }
234 | },
235 | "tax_id_type": {
236 | "type": "string",
237 | "inferenceType": "explicit",
238 | "instruction": "The tax ID type (SSN or EIN) from item 25"
239 | },
240 | "tax_id_number": {
241 | "type": "string",
242 | "inferenceType": "explicit",
243 | "instruction": "The federal tax ID number (SSN or EIN) from item 25"
244 | },
245 | "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"},
246 | "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"}
247 | }
248 | }
--------------------------------------------------------------------------------
/20-Industry-Use-Cases/22-Medical-Claims-Processing/data/blueprint/claims_form.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "class": "CMS 1500 Claim Form",
4 | "description": "A standard medical claim form used by healthcare providers in the US to bill health insurance companies for medical services.",
5 | "definitions": {
6 | "Procedure_Service_Supplies": {
7 | "properties": {
8 | "service_start_date": {
9 | "type": "string",
10 | "inferenceType": "explicit",
11 | "instruction": "The service start date from item 24A in YYYY-MM-DD format"
12 | },
13 | "service_end_date": {
14 | "type": "string",
15 | "inferenceType": "explicit",
16 | "instruction": "The service end date from item 24A in YYYY-MM-DD format"
17 | },
18 | "place_of_service": {
19 | "type": "string",
20 | "instruction": "The place the service was provided"
21 | },
22 | "type_of_service": {
23 | "type": "string",
24 | "instruction": "The type of medical service"
25 | },
26 | "procedure_modifier": {
27 | "type": "string",
28 | "inferenceType": "explicit",
29 | "instruction": "The procedure modifier from item 24D"
30 | },
31 | "diagnosis_code": {
32 | "type": "string",
33 | "inferenceType": "explicit",
34 | "instruction": "The diagnosis code from item 24E"
35 | },
36 | "procedure_code": {
37 | "type": "string",
38 | "instruction": "The procedure code"
39 | },
40 | "charge_amount": {
41 | "type": "number",
42 | "instruction": "The charge amount for the procedure"
43 | }
44 | }
45 | }
46 | },
47 | "properties": {
48 | "insurance_program": {
49 | "type": "string",
50 | "inferenceType": "explicit",
51 | "instruction": "The insurance program from item 1: Medicare, Medicaid, CHAMPUS, CHAMPVA, Group Health Plan"
52 | },
53 | "insured_id_number": {
54 | "type": "string",
55 | "inferenceType": "explicit",
56 | "instruction": "The insured's ID number from item 1a"
57 | },
58 | "patient_name": {
59 | "type": "string",
60 | "inferenceType": "explicit",
61 | "instruction": "The patient's name from item 2 in Last Name, First Name, Middle Initial format"
62 | },
63 | "patient_date_of_birth": {
64 | "type": "string",
65 | "inferenceType": "explicit",
66 | "instruction": "The patient's date of birth from item 3 in YYYY-MM-DD format"
67 | },
68 | "insured_name": {
69 | "type": "string",
70 | "inferenceType": "explicit",
71 | "instruction": "The insured's name from item 4 in Last Name, First Name, Middle Initial format"
72 | },
73 | "patient_address": {
74 | "type": "string",
75 | "inferenceType": "explicit",
76 | "instruction": "The patient's address from item 5"
77 | },
78 | "patient_relationship_to_insured": {
79 | "type": "string",
80 | "inferenceType": "explicit",
81 | "instruction": "The patient's relationship to insured from item 6"
82 | },
83 | "insured_address": {
84 | "type": "string",
85 | "inferenceType": "explicit",
86 | "instruction": "The insured's address from item 7 including No.,Street, City, State, Zip Code"
87 | },
88 | "insured_phone_number": {
89 | "type": "string",
90 | "inferenceType": "explicit",
91 | "instruction": "The insured's phone number, including area code from item 7 "
92 | },
93 | "patient_sex": {
94 | "type": "string",
95 | "inferenceType": "explicit",
96 | "instruction": "The patient's address from item 8"
97 | },
98 | "patient_marital_status": {
99 | "type": "string",
100 | "inferenceType": "explicit",
101 | "instruction": "The patient's address from item 8"
102 | },
103 | "patient_condition_related_to": {
104 | "type": "string",
105 | "inferenceType": "explicit",
106 | "instruction": "Whether the patient's condition is related to employment, auto accident, or other accident from item 10"
107 | },
108 | "insured_policy_feca_number": {
109 | "type": "string",
110 | "inferenceType": "explicit",
111 | "instruction": "The insured's policy group or FECA number from item 11"
112 | },
113 | "insured_date_of_birth": {
114 | "type": "string",
115 | "inferenceType": "explicit",
116 | "instruction": "The insured's policy or group number from item 11a"
117 | },
118 | "insured_employer_or_school": {
119 | "type": "string",
120 | "inferenceType": "explicit",
121 | "instruction": "The insured's employer or school 11b"
122 | },
123 | "insured_insurance_plan_name": {
124 | "type": "string",
125 | "inferenceType": "explicit",
126 | "instruction": "The insured's plan name or program name from item 11c"
127 | },
128 | "another_health_benefit_plan_indicator": {
129 | "type": "boolean",
130 | "inferenceType": "explicit",
131 | "instruction": "d. IS THERE ANOTHER HEALTH BENEFIT PLAN? Yes or No from item 11d"
132 | },
133 | "patient_signed_date": {
134 | "type": "string",
135 | "inferenceType": "explicit",
136 | "instruction": "patient's or authorized person's signature date from item 12"
137 | },
138 | "insured_signed_date": {
139 | "type": "string",
140 | "inferenceType": "explicit",
141 | "instruction": "The insured's or authorized person's signed date from item 13"
142 | },
143 | "illness_injury_date": {
144 | "type": "string",
145 | "inferenceType": "explicit",
146 | "instruction": "The date of current illness, injury, or pregnancy from item 14 in YYYY-MM-DD format"
147 | },
148 | "previous_illness_date": {
149 | "type": "string",
150 | "inferenceType": "explicit",
151 | "instruction": "The date of a previous similar illness from item 15 in YYYY-MM-DD format"
152 | },
153 | "unable_to_work_start_date": {
154 | "type": "string",
155 | "inferenceType": "explicit",
156 | "instruction": "The dates the patient was unable to work from item 16"
157 | },
158 | "unable_to_work_end_date": {
159 | "type": "string",
160 | "inferenceType": "explicit",
161 | "instruction": "The dates the patient was unable to work until item 16"
162 | },
163 | "referring_physician": {
164 | "type": "string",
165 | "inferenceType": "explicit",
166 | "instruction": "The name of the referring physician from item 17"
167 | },
168 | "referring_physician_id": {
169 | "type": "string",
170 | "inferenceType": "explicit",
171 | "instruction": "The ID number of the referring physician from item 17a"
172 | },
173 | "hospitalization_start_date": {
174 | "type": "string",
175 | "inferenceType": "explicit",
176 | "instruction": "The hospitalization start date related to current services from item 18"
177 | },
178 | "hospitalization_end_date": {
179 | "type": "string",
180 | "inferenceType": "explicit",
181 | "instruction": "The hospitalization end date related to current services from item 18"
182 | },
183 | "is_outside_lab_indicator": {
184 | "type": "boolean",
185 | "inferenceType": "explicit",
186 | "instruction": "Are there outside lab charges? from item 20"
187 | },
188 | "outside_lab_charges": {
189 | "type": "string",
190 | "inferenceType": "explicit",
191 | "instruction": "Whether outside lab was used and charges from item 20"
192 | },
193 | "diagnosis_1": {
194 | "type": "string",
195 | "inferenceType": "explicit",
196 | "instruction": "The diagnosis or nature of illness or injury from item 21.1"
197 | },
198 | "diagnosis_2": {
199 | "type": "string",
200 | "inferenceType": "explicit",
201 | "instruction": "The diagnosis or nature of illness or injury from item 21.2"
202 | },
203 | "diagnosis_3": {
204 | "type": "string",
205 | "inferenceType": "explicit",
206 | "instruction": "The diagnosis or nature of illness or injury from item 21.3"
207 | },
208 | "diagnosis_4": {
209 | "type": "string",
210 | "inferenceType": "explicit",
211 | "instruction": "The diagnosis or nature of illness or injury from item 21.4"
212 | },
213 | "medicaid_resubmission_number": {
214 | "type": "string",
215 | "inferenceType": "explicit",
216 | "instruction": "MEDICAID RESUBMISSION NUMBER from item 22"
217 | },
218 | "medicaid_original_ref_number": {
219 | "type": "string",
220 | "inferenceType": "explicit",
221 | "instruction": "Medicaid - Original ref no. from item 22"
222 | },
223 | "prior_authorization_number": {
224 | "type": "string",
225 | "inferenceType": "explicit",
226 | "instruction": "The prior authorization number from item 23"
227 | },
228 | "medical_procedures": {
229 | "type": "array",
230 | "instruction": "The list of medical procedures from the table in item 24",
231 | "items": {
232 | "$ref": "#/definitions/Procedure_Service_Supplies"
233 | }
234 | },
235 | "tax_id_type": {
236 | "type": "string",
237 | "inferenceType": "explicit",
238 | "instruction": "The tax ID type (SSN or EIN) from item 25"
239 | },
240 | "tax_id_number": {
241 | "type": "string",
242 | "inferenceType": "explicit",
243 | "instruction": "The federal tax ID number (SSN or EIN) from item 25"
244 | },
245 | "total_charges": {"type": "number","inferenceType": "explicit","instruction": "The total charges in dollars from item 28"},
246 | "amount_paid": {"type": "number","inferenceType": "explicit","instruction": "The amount paid in dollars from item 29"}
247 | }
248 | }
--------------------------------------------------------------------------------
/10-Understanding-BDA/utils/display_functions.py:
--------------------------------------------------------------------------------
1 | import ipywidgets as widgets
2 | from IPython.display import display, HTML
3 | import pandas as pd
4 | from PIL import Image
5 | import io
6 | import boto3
7 | from urllib.parse import urlparse
8 | from pdf2image import convert_from_bytes
9 |
10 |
11 | s3 = boto3.client('s3')
12 |
13 |
14 | onclick_function = """
15 |
41 | """
42 |
43 | def load_image(uri):
44 | if uri.startswith('s3://'):
45 | bucket, key = urlparse(uri).netloc, urlparse(uri).path.lstrip('/')
46 | file_content = s3.get_object(Bucket=bucket, Key=key)['Body'].read()
47 | else:
48 | file_content = open(uri, 'rb').read()
49 |
50 | if uri.lower().endswith('.pdf'):
51 | img_io = io.BytesIO()
52 | convert_from_bytes(file_content)[0].save(img_io, format='JPEG')
53 | return img_io.getvalue()
54 |
55 | img = Image.open(io.BytesIO(file_content))
56 | if img.format != 'JPEG':
57 | img_io = io.BytesIO()
58 | img.save(img_io, format='JPEG')
59 | return img_io.getvalue()
60 | return file_content
61 |
62 |
63 | def get_kv_html(kv_pairs):
64 | # Create key-value pairs display
65 | kv_html = onclick_function
66 | kv_html += """
67 |
68 |
69 |
75 | """
76 |
77 | for i, (key, (value, confidence)) in enumerate(kv_pairs.items()):
78 | kv_html += '
'
229 | html = ""
230 | for key, value in data.items():
231 | if isinstance(value, dict) and 'value' in value:
232 | conf = value.get('confidence', 0) * 100
233 | html += f"""
234 |