├── .editorconfig ├── .env.template ├── .gitattributes ├── .gitignore ├── .yarn └── patches │ └── promptfoo-npm-0.113.3-239bf96f0e.patch ├── CITATION.cff ├── README.md ├── assertions ├── fhirPathEquals.mjs ├── isBundle.mjs ├── metaElementMissing.mjs └── validateOperation.mjs ├── etc └── fhir-gpt.yaml ├── evals ├── extraction │ ├── config-minimalist.yaml │ ├── config-specialist.yaml │ ├── providers.yaml │ └── tests │ │ ├── basic-demographics.yaml │ │ ├── conditions.yaml │ │ ├── explanations-of-benefit.yaml │ │ ├── medication-requests.yaml │ │ ├── observations.yaml │ │ ├── patient-history.json │ │ └── patient-history.yaml └── generation │ ├── config-multi-turn-tool-use.js │ ├── config-zero-shot-bundle.yaml │ ├── markdown-transformer.js │ ├── providers.yaml │ └── tests.yaml ├── package.json ├── providers ├── AnthropicMessagesWithRecursiveToolCallsProvider.ts └── OpenAiResponsesWithRecursiveToolCallsProvider.ts ├── tools └── validateFhirBundle.mjs ├── tsconfig.json └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | 7 | [*.{js,json,yml}] 8 | charset = utf-8 9 | indent_style = space 10 | indent_size = 2 11 | -------------------------------------------------------------------------------- /.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | ANTHROPIC_API_KEY= 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /.yarn/** linguist-vendored 2 | /.yarn/releases/* binary 3 | /.yarn/plugins/**/* binary 4 | /.pnp.* binary linguist-generated 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | # Logs 4 | 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Caches 14 | 15 | .cache 16 | 17 | # Diagnostic reports (https://nodejs.org/api/report.html) 18 | 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 20 | 21 | # Runtime data 22 | 23 | pids 24 | _.pid 25 | _.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | 30 | lib-cov 31 | 32 | # Coverage directory used by tools like istanbul 33 | 34 | coverage 35 | *.lcov 36 | 37 | # nyc test coverage 38 | 39 | .nyc_output 40 | 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 42 | 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | 47 | bower_components 48 | 49 | # node-waf configuration 50 | 51 | .lock-wscript 52 | 53 | # Compiled binary addons (https://nodejs.org/api/addons.html) 54 | 55 | build/Release 56 | 57 | # Dependency directories 58 | 59 | node_modules/ 60 | jspm_packages/ 61 | 62 | # Snowpack dependency directory (https://snowpack.dev/) 63 | 64 | web_modules/ 65 | 66 | # TypeScript cache 67 | 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | 72 | .npm 73 | 74 | # Optional eslint cache 75 | 76 | .eslintcache 77 | 78 | # Optional stylelint cache 79 | 80 | .stylelintcache 81 | 82 | # Microbundle cache 83 | 84 | .rpt2_cache/ 85 | .rts2_cache_cjs/ 86 | .rts2_cache_es/ 87 | .rts2_cache_umd/ 88 | 89 | # Optional REPL history 90 | 91 | .node_repl_history 92 | 93 | # Output of 'npm pack' 94 | 95 | *.tgz 96 | 97 | # Yarn Integrity file 98 | 99 | .yarn-integrity 100 | 101 | # dotenv environment variable files 102 | 103 | .env 104 | .env.development.local 105 | .env.test.local 106 | .env.production.local 107 | .env.local 108 | 109 | # parcel-bundler cache (https://parceljs.org/) 110 | 111 | .parcel-cache 112 | 113 | # Next.js build output 114 | 115 | .next 116 | out 117 | 118 | # Nuxt.js build / generate output 119 | 120 | .nuxt 121 | dist 122 | 123 | # Gatsby files 124 | 125 | # Comment in the public line in if your project uses Gatsby and not Next.js 126 | 127 | # https://nextjs.org/blog/next-9-1#public-directory-support 128 | 129 | # public 130 | 131 | # vuepress build output 132 | 133 | .vuepress/dist 134 | 135 | # vuepress v2.x temp and cache directory 136 | 137 | .temp 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.* 170 | 171 | # IntelliJ based IDEs 172 | .idea 173 | 174 | # Finder (MacOS) folder config 175 | .DS_Store 176 | 177 | evals/private-* -------------------------------------------------------------------------------- /.yarn/patches/promptfoo-npm-0.113.3-239bf96f0e.patch: -------------------------------------------------------------------------------- 1 | diff --git a/package.json b/package.json 2 | index e17eeb5e4ecf61ac3c8b6f96d6a16445f31a60e1..142ad6371c613270ece06fa02543cdda934d1ce0 100644 3 | --- a/package.json 4 | +++ b/package.json 5 | @@ -14,6 +14,42 @@ 6 | ".": { 7 | "import": "./dist/src/index.js", 8 | "require": "./dist/src/index.js" 9 | + }, 10 | + "./dist/src/providers/openai/responses": { 11 | + "import": "./dist/src/providers/openai/responses.js", 12 | + "require": "./dist/src/providers/openai/responses.js" 13 | + }, 14 | + "./dist/src/providers/openai/util": { 15 | + "import": "./dist/src/providers/openai/util.js", 16 | + "require": "./dist/src/providers/openai/util.js" 17 | + }, 18 | + "./dist/src/logger": { 19 | + "import": "./dist/src/logger.js", 20 | + "require": "./dist/src/logger.js" 21 | + }, 22 | + "./dist/src/providers/openai/types": { 23 | + "import": "./dist/src/providers/openai/types.js", 24 | + "require": "./dist/src/providers/openai/types.js" 25 | + }, 26 | + "./dist/src/envars": { 27 | + "import": "./dist/src/envars.js", 28 | + "require": "./dist/src/envars.js" 29 | + }, 30 | + "./dist/src/util": { 31 | + "import": "./dist/src/util/index.js", 32 | + "require": "./dist/src/util/index.js" 33 | + }, 34 | + "./dist/src/providers/anthropic/generic": { 35 | + "import": "./dist/src/providers/anthropic/generic.js", 36 | + "require": "./dist/src/providers/anthropic/generic.js" 37 | + }, 38 | + "./dist/src/providers/anthropic/types": { 39 | + "import": "./dist/src/providers/anthropic/types.js", 40 | + "require": "./dist/src/providers/anthropic/types.js" 41 | + }, 42 | + "./dist/src/providers/anthropic/util": { 43 | + "import": "./dist/src/providers/anthropic/util.js", 44 | + "require": "./dist/src/providers/anthropic/util.js" 45 | } 46 | }, 47 | "workspaces": [ 48 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: 'If you use this software, please cite it as below.' 3 | authors: 4 | - family-names: 'Kelly' 5 | given-names: 'Joshua' 6 | orcid: 'https://orcid.org/0009-0000-7191-0595' 7 | title: 'FHIR LLM Eval' 8 | version: 0.0.1 9 | date-released: 2024-11-22 10 | url: 'https://github.com/flexpa/fhir-llm-evals' 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # @flexpa/llm-fhir-eval 2 | 3 | > [!NOTE] 4 | > Follow the development progress on [FHIR Chat](https://chat.fhir.org/#narrow/channel/323443-Artificial-Intelligence.2FMachine-Learning-.28AI.2FML.29/topic/LLM.20FHIR.20Eval.20Preview/near/483998202). 5 | 6 | ## Overview 7 | 8 | `@flexpa/llm-fhir-eval` is an evaluation framework designed to benchmark the performance of LLMs on FHIR-specific tasks including generation, validation, and extraction. This framework systematically tests and validates the capabilities of LLMs in handling various healthcare-interoperability related tasks, ensuring they meet the standards required for effective FHIR implementations. It implements evaluations from prior art such as [FHIR-GPT](https://ai.nejm.org/doi/10.1056/AIcs2300301). 9 | 10 | ## Benchmark 11 | 12 | `@flexpa/llm-fhir-eval` benchmarks FHIR-specific tasks including: 13 | 14 | 1. **FHIR Resource Generation**: 15 | - Generate accurate FHIR resources such as `Patient`, `Observation`, `MedicationStatement`, etc. 16 | - Test the ability to create complex resource relationships and validate terminology bindings. 17 | 18 | 2. **FHIR Resource Validation**: 19 | - Validate FHIR resources using operations like `$validate`. 20 | - Check for schema compliance, required field presence, and value set binding verification. 21 | 22 | 3. **Data Extraction**: 23 | - Extract structured FHIR-compliant data from clinical notes and other unstructured data. 24 | - Evaluate the proficiency of LLMs in extracting specific healthcare data elements. 25 | 26 | 4. **Tool Use**: 27 | - Test models' ability to use FHIR validation tools and other healthcare-specific functions. 28 | - Validate proper tool calling for FHIR operations. 29 | 30 | ## Available Evaluations 31 | 32 | 1. **Data Extraction** (`evals/extraction/`) 33 | - Description: Comprehensive evaluation of extracting structured FHIR data from unstructured clinical text. 34 | - Configurations: Both minimalist and specialist approaches available. 35 | - Test categories: Basic demographics, conditions, explanations of benefit, medication requests, observations. 36 | 37 | 2. **FHIR Resource Generation** (`evals/generation/`) 38 | - Description: Tests the ability to generate valid FHIR resources and bundles. 39 | - Configurations: Zero-shot bundle generation and multi-turn tool use scenarios. 40 | - Models supported: GPT-3.5-turbo, GPT-4.1, O3 (low/high reasoning), Claude 3.5 Haiku, Claude 3.5 Sonnet, Claude Sonnet 4, Claude Opus 4 41 | 42 | ## Custom Assertions 43 | 44 | The framework includes custom assertion functions: 45 | 46 | - `fhirPathEquals.mjs`: Validates FHIR Path expressions 47 | - `isBundle.mjs`: Checks if output is a valid FHIR Bundle 48 | - `metaElementMissing.mjs`: Validates required metadata elements 49 | - `validateOperation.mjs`: Validates FHIR operation results 50 | 51 | ## Tools 52 | 53 | - `validateFhirBundle.mjs`: Tool for validating FHIR Bundle resources 54 | 55 | ## Custom Providers 56 | 57 | - `AnthropicMessagesWithRecursiveToolCallsProvider.ts`: Enhanced Anthropic provider with recursive tool calling (up to 10 depth levels) 58 | - `OpenAiResponsesWithRecursiveToolCallsProvider.ts`: Enhanced OpenAI provider with recursive tool calling 59 | 60 | These providers enable multi-turn tool interactions where models can iteratively call validation tools to improve their FHIR resource generation. 61 | 62 | ## Commands to Run Evaluations 63 | 64 | Install dependencies and set up environment variables: 65 | 66 | ```bash 67 | yarn install 68 | ``` 69 | 70 | Copy the `.env.template` file to `.env` and supply your API keys for the models you plan to test. 71 | 72 | Run an evaluation: 73 | 74 | ```bash 75 | # Example: Run the extraction evaluation with minimalist config 76 | promptfoo eval -c evals/extraction/config-minimalist.yaml 77 | 78 | # Example: Run the FHIR bundle generation evaluation 79 | promptfoo eval -c evals/generation/config-zero-shot-bundle.yaml 80 | 81 | # Example: Run multi-turn tool use evaluation 82 | promptfoo eval -c evals/generation/config-multi-turn-tool-use.js 83 | ``` 84 | 85 | The evaluation will print its performance metrics to the console and optionally save results to files. 86 | 87 | -------------------------------------------------------------------------------- /assertions/fhirPathEquals.mjs: -------------------------------------------------------------------------------- 1 | import { evalFhirPath } from '@medplum/core'; 2 | 3 | export default (output, context) => { 4 | try { 5 | const result = JSON.parse(output); 6 | const fhirPath = context.fhirpath; 7 | const evalResults = evalFhirPath(fhirPath, result); 8 | return evalResults.length > 0; 9 | } catch { 10 | return false; 11 | } 12 | }; 13 | -------------------------------------------------------------------------------- /assertions/isBundle.mjs: -------------------------------------------------------------------------------- 1 | export default (output, _context) => { 2 | try { 3 | const result = JSON.parse(output); 4 | return result.resourceType === 'Bundle'; 5 | } catch { 6 | return false; 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /assertions/metaElementMissing.mjs: -------------------------------------------------------------------------------- 1 | export default (output, _context) => { 2 | try { 3 | const result = JSON.parse(output); 4 | if (!result || typeof result !== 'object') { 5 | return false; 6 | } 7 | 8 | // Bundle itself should not include a meta element. 9 | if (result.meta !== undefined) { 10 | return false; 11 | } 12 | 13 | if (!Array.isArray(result.entry)) { 14 | return false; 15 | } 16 | 17 | // Each resource must either: 18 | // 1. Have no meta element, OR 19 | // 2. Have a meta element whose only property is `profile`. 20 | const resourceMetaIsValid = (meta) => { 21 | if (meta === undefined) return true; 22 | // meta must be an object with exactly one key: "profile" 23 | return typeof meta === 'object' && meta !== null && Object.keys(meta).length === 1 && 'profile' in meta; 24 | }; 25 | 26 | return result.entry.every((e) => resourceMetaIsValid(e.resource?.meta)); 27 | } catch { 28 | return false; 29 | } 30 | }; 31 | -------------------------------------------------------------------------------- /assertions/validateOperation.mjs: -------------------------------------------------------------------------------- 1 | import { randomUUID } from 'crypto'; 2 | 3 | // Validate a FHIR resource using the local validator and return error issues or true if none found 4 | export async function validate(modelResponse) { 5 | const response = await fetch('http://localhost:8082/validate', { 6 | method: 'POST', 7 | headers: { 8 | accept: 'application/json', 9 | 'Content-Type': 'application/json', 10 | }, 11 | body: JSON.stringify({ 12 | cliContext: { 13 | sv: '4.0.1', 14 | ig: ['hl7.fhir.us.core#4.0.0'], 15 | locale: 'en', 16 | }, 17 | filesToValidate: [ 18 | { 19 | fileName: 'manually_entered_file.json', 20 | fileContent: modelResponse, 21 | fileType: 'json', 22 | }, 23 | ], 24 | sessionId: randomUUID(), 25 | }), 26 | }); 27 | const data = await response.json(); 28 | 29 | const errorIssues = data.outcomes.flatMap((outcome) => outcome.issues).filter((issue) => issue.level === 'ERROR'); 30 | 31 | return errorIssues; 32 | } 33 | 34 | export default async function evaluate(modelResponse) { 35 | const response = await validate(modelResponse); 36 | 37 | return response.length === 0 ? true : response; 38 | } 39 | -------------------------------------------------------------------------------- /etc/fhir-gpt.yaml: -------------------------------------------------------------------------------- 1 | description: 'FHIR-GPT Prompt Supplementary Appendix DOI: 10.1056/AIcs2300301' 2 | 3 | providers: 4 | - anthropic:messages:claude-3-5-sonnet-20241022 5 | - openai:chat:gpt-4o 6 | - openai:chat:gpt-4o-mini 7 | 8 | prompts: 9 | - label: 'FHIR-GPT Prompt' 10 | description: 'Prompt used in FHIR-GPT' 11 | raw: | 12 | You are a helpful assistant that can help with medication data extraction. 13 | User will paste a short narrative that describes the administration of a drug. 14 | Please extract the drug route (How drug should enter body), e.g. PO, IV. 15 | All other drug information, e,g. dosage, frequency, reason shall be discarded. 16 | 17 | Please MUST ONLY return the converted .json result without any explanations, or contexts. 18 | The output itself must be parse-able with python's json.loads() 19 | The output should start and end with brackets. 20 | 21 | If you cannot find related drug route, you MUST leave it as blank and MUST return a blank json {} 22 | You MUST use information only from the original text, MUST NOT infer from the context. 23 | 24 | For each drug route, please extract the originial text and find its most related SNOMED code in o 25 | If you cannot find an exact same meaning SNOMED drug route code, just leave it as blank. 26 | 27 | For example, the narrative "Oxycodone-Acetaminophen 5-325 mg Tablet Sig: 1-2 Tablets PO\nQ4-6H (e 28 | You should return a json format: {'text': 'PO', 'coding': [{'system': 'http://snomed.info/sct', 29 | 30 | Another example, the narrative "Daptomycin, intravenously for a total of 14\ndays" 31 | You should return a json format: {'text': 'intravenously', 'coding': [{'system': 'http://snomed.i 32 | 33 | Another example, the narrative "heparin sodium, porcine 5000 UNT/ML Injectable Solution" 34 | You should return a json format: {'text': 'injections'} 35 | 36 | Another example, the narrative "Oxycodone-Acetaminophen 5-325 mg q4h prn torn ACL pain" 37 | You should return blank json {} 38 | 39 | SNOMED codes to select from: 40 | 41 | Code Display 42 | 284009009 Route of administration values 43 | 6064005 Topical route 44 | 10547007 Otic route 45 | 12130007 Intra-articular route 46 | 16857009 Per vagina 47 | 26643006 Oral route 48 | 34206005 Subcutaneous route 49 | 37161004 Per rectum 50 | 37737002 Intraluminal route 51 | 37839007 Sublingual route 52 | 38239002 Intraperitoneal route 53 | 45890007 Transdermal route 54 | 46713006 Nasal route 55 | 47625008 Intravenous route 56 | 54471007 Buccal route 57 | 54485002 Ophthalmic route 58 | 58100008 Intra-arterial route 59 | 60213007 Intramedullary route 60 | 62226000 Intrauterine route 61 | 72607000 Intrathecal route 62 | 78421000 Intramuscular route 63 | 90028008 Urethral route 64 | 127490009 Gastrostomy route 65 | 127491008 Jejunostomy route 66 | 127492001 Nasogastric route 67 | 372449004 Dental use 68 | 372450004 Endocervical use 69 | 372451000 Endosinusial use 70 | 372452007 Endotracheopulmonary use 71 | 372453002 Extra-amniotic use 72 | 372454008 Gastroenteral use 73 | 372457001 Gingival use 74 | 372458006 Intraamniotic use 75 | 372459003 Intrabursal use 76 | 372460008 Intracardiac use 77 | 372461007 Intracavernous use 78 | 372462000 Intracervical route 79 | 372463005 Intracoronary use 80 | 372464004 Intradermal use 81 | 372465003 Intradiscal use 82 | 372466002 Intralesional use 83 | 372467006 Intralymphatic use 84 | 372468001 Intraocular use 85 | 372469009 Intrapleural use 86 | 372470005 Intrasternal use 87 | 372471009 Intravesical use 88 | 372472002 Ocular route 89 | 372473007 Oromucosal use 90 | 372474001 Periarticular use 91 | 372475000 Perineural use 92 | 372476004 Subconjunctival use 93 | 404815008 Transmucosal route 94 | 404818005 Intratracheal route 95 | 404819002 Intrabiliary route 96 | 404820008 Epidural route 97 | 416174007 Suborbital route 98 | 417070009 Caudal route 99 | 417255000 Intraosseous route 100 | 417950001 Intrathoracic route 101 | 417985001 Enteral route 102 | 417989007 Intraductal route 103 | 418091004 Intratympanic route 104 | 418114005 Intravenous central route 105 | 418133000 Intramyometrial route 106 | 418136008 Gastro-intestinal stoma route 107 | 418162004 Colostomy route 108 | 418204005 Periurethral route 109 | 418287000 Intracoronal route 110 | 418321004 Retrobulbar route 111 | 418331006 Intracartilaginous route 112 | 418401004 Intravitreal route 113 | 418418000 Intraspinal route 114 | 418441008 Orogastric route 115 | 418511008 Transurethral route 116 | 418586008 Intratendinous route 117 | 418608002 Intracorneal route 118 | 418664002 Oropharyngeal route 119 | 418722009 Peribulbar route 120 | 418730005 Nasojejunal route 121 | 418743005 Fistula route 122 | 418813001 Surgical drain route 123 | 418821007 Intracameral route 124 | 418851001 Paracervical route 125 | 418877009 Intrasynovial route 126 | 418887008 Intraduodenal route 127 | 418892005 Intracisternal route 128 | 418947002 Intratesticular route 129 | 418987007 Intracranial route 130 | 419021003 Tumour cavity route 131 | 419165009 Paravertebral route 132 | 419231003 Intrasinal route 133 | 419243002 Transcervical route 134 | 419320008 Subtendinous route 135 | 419396008 Intraabdominal route 136 | 419601003 Subgingival route 137 | 419631009 Intraovarian route 138 | 419684008 Ureteral route 139 | 419762003 Peritendinous route 140 | 419778001 Intrabronchial route 141 | 419810008 Intraprostatic route 142 | 419874009 Submucosal route 143 | 419894000 Surgical cavity route 144 | 419954003 Ileostomy route 145 | 419993007 Intravenous peripheral route 146 | 147 | User input: 148 | {{note}} 149 | 150 | defaultAssert: 151 | - type: is-json 152 | 153 | tests: 154 | - vars: 155 | note: | 156 | Carvedilol 157 | 6.25 mg PO BID 158 | assert: 159 | - type: equals 160 | value: 161 | { 162 | 'text': 'PO', 163 | 'coding': [{ 'system': 'http://snomed.info/sct', 'code': '26643006', 'display': 'Oral route' }], 164 | } 165 | -------------------------------------------------------------------------------- /evals/extraction/config-minimalist.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json 2 | description: 'Structured FHIR Data Extraction' 3 | 4 | providers: 5 | - file://./providers.yaml 6 | 7 | prompts: 8 | - label: 'Minimalist' 9 | raw: | 10 | Extract the answer to the question from the FHIR resource. 11 | 12 | 13 | {{resource}} 14 | 15 | 16 | 17 | {{question}} 18 | 19 | 20 | defaultTest: 21 | options: 22 | transform: output.trim() 23 | 24 | tests: 25 | - file://tests/basic-demographics.yaml 26 | - file://tests/conditions.yaml 27 | - file://tests/observations.yaml 28 | - file://tests/explanations-of-benefit.yaml 29 | - file://tests/medication-requests.yaml 30 | - file://tests/patient-history.yaml 31 | -------------------------------------------------------------------------------- /evals/extraction/config-specialist.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json 2 | description: 'Structured FHIR Data Extraction' 3 | 4 | providers: 5 | - file://./providers.yaml 6 | 7 | prompts: 8 | - label: 'Specialist' 9 | raw: | 10 | You are a FHIR data extraction specialist. 11 | Given a FHIR resource and a question, extract the requested information. 12 | Return only the specific answer without explanation. 13 | If the question cannot be answered with the information provided, return "N/A". 14 | Do not infer or make assumptions. 15 | When the question is about a specific value, return the value only. 16 | When the value exists literally in the FHIR resource, return the value only. 17 | If a unit is specified, return the value with unit, in the normally expected format. 18 | Do not return extra text or formatting including unnecesary quotes around strings. 19 | Do not append or prepend any newlines. 20 | 21 | 22 | {{resource}} 23 | 24 | 25 | 26 | {{question}} 27 | 28 | 29 | defaultTest: 30 | options: 31 | transform: output.trim() 32 | 33 | tests: 34 | - file://tests/basic-demographics.yaml 35 | - file://tests/conditions.yaml 36 | - file://tests/observations.yaml 37 | - file://tests/explanations-of-benefit.yaml 38 | - file://tests/medication-requests.yaml 39 | - file://tests/patient-history.yaml 40 | -------------------------------------------------------------------------------- /evals/extraction/providers.yaml: -------------------------------------------------------------------------------- 1 | - label: openai-gpt-3.5-turbo 2 | id: openai:chat:gpt-3.5-turbo 3 | config: 4 | max_output_tokens: 8092 5 | - label: openai-gpt-4.1 6 | id: openai:responses:gpt-4.1 7 | config: 8 | max_output_tokens: 8092 9 | - label: openai-o3-low 10 | id: openai:responses:o3 11 | config: 12 | max_output_tokens: 16184 13 | reasoning_effort: 'low' 14 | - label: openai-o3-high 15 | id: openai:responses:o3 16 | config: 17 | max_output_tokens: 16184 18 | reasoning_effort: 'high' 19 | - label: claude-3-5-haiku-20241022 20 | id: anthropic:messages:claude-3-5-haiku-20241022 21 | config: 22 | max_output_tokens: 8092 23 | - label: anthropic-claude-3-5-sonnet-20241022 24 | id: anthropic:messages:claude-3-5-sonnet-20241022 25 | config: 26 | max_output_tokens: 8092 27 | - label: anthropic-claude-sonnet-4-20250514 28 | id: anthropic:messages:claude-sonnet-4-20250514 29 | config: 30 | max_output_tokens: 8092 31 | - label: anthropic-claude-opus-4-20250514 32 | id: anthropic:messages:claude-opus-4-20250514 33 | config: 34 | max_output_tokens: 8092 35 | - label: google-gemini-2.0-flash 36 | id: google:gemini-2.0-flash 37 | config: 38 | max_output_tokens: 8092 39 | - label: google-gemini-2.5-flash-preview-05-20 40 | id: google:gemini-2.5-flash-preview-05-20 41 | config: 42 | max_output_tokens: 8092 43 | - label: google-gemini-2.5-pro-preview-05-06 44 | id: google:gemini-2.5-pro-preview-05-06 45 | config: 46 | max_output_tokens: 8092 47 | - label: ii-medical-8b 48 | id: openai:chat:II-Medical-8B 49 | config: 50 | max_output_tokens: 16184 51 | apiBaseUrl: https://g6ifi04b81u9oza5.us-east-1.aws.endpoints.huggingface.cloud/v1 52 | # @note set API KEY 53 | # apiKey: 54 | showThinking: false 55 | transform: | 56 | output = output.replace(/.*<\/think>/gis, '').trim(); 57 | output = output.replace(/<\s*\/?\s*Answer\s*>/gi, '').trim(); 58 | return output; 59 | - label: medgemma-4b-it 60 | id: openai:chat:medgemma-4b-it 61 | config: 62 | max_output_tokens: 16184 63 | apiBaseUrl: https://a6pf0b0uqcuajaua.us-east-1.aws.endpoints.huggingface.cloud/v1 64 | # @note set API KEY 65 | # apiKey: 66 | showThinking: false 67 | transform: | 68 | output = output.replace(/<\s*\/?\s*Answer\s*>/gi, '').trim(); 69 | return output; 70 | - label: medgemma-27b-text-it 71 | id: openai:chat:medgemma-27b-text-it 72 | config: 73 | max_output_tokens: 16184 74 | apiBaseUrl: https://i7n97jz1el3l39h5.us-east-1.aws.endpoints.huggingface.cloud/v1 75 | # @note set API KEY 76 | # apiKey: 77 | showThinking: false 78 | transform: | 79 | output = output.replace(/<\s*\/?\s*Answer\s*>/gi, '').trim(); 80 | return output; 81 | -------------------------------------------------------------------------------- /evals/extraction/tests/basic-demographics.yaml: -------------------------------------------------------------------------------- 1 | - description: Full name 2 | vars: 3 | resource: &patient_resource_json | 4 | { 5 | "resourceType": "Patient", 6 | "id": "example", 7 | "name": [ 8 | { 9 | "use": "official", 10 | "family": "Smith-Jones", 11 | "given": ["John", "Jacob", "Jingleheimer"], 12 | "prefix": ["Dr."], 13 | "suffix": ["Jr."] 14 | }, 15 | { 16 | "use": "nickname", 17 | "given": ["Jack"] 18 | }, 19 | { 20 | "use": "maiden", 21 | "family": "Johnson" 22 | } 23 | ], 24 | "gender": "other", 25 | "birthDate": "1990-01-15", 26 | "address": [ 27 | { 28 | "use": "home", 29 | "line": ["123 Main St", "Apt 4B"], 30 | "city": "Anytown", 31 | "state": "CA", 32 | "postalCode": "90210" 33 | }, 34 | { 35 | "use": "old", 36 | "line": ["456 Elm St"], 37 | "city": "Oldtown", 38 | "state": "NY", 39 | "postalCode": "54321" 40 | } 41 | ], 42 | "telecom": [ 43 | { 44 | "system": "phone", 45 | "value": "555-123-4567", 46 | "use": "home" 47 | }, 48 | { 49 | "system": "email", 50 | "value": "john.smith@example.com", 51 | "use": "work" 52 | } 53 | ], 54 | "maritalStatus": { 55 | "coding": [ 56 | { 57 | "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus", 58 | "code": "M", 59 | "display": "Married" 60 | } 61 | ] 62 | }, 63 | "multipleBirthBoolean": true 64 | } 65 | question: What is the patient's full name? 66 | assert: 67 | - type: equals 68 | value: Dr. John Jacob Jingleheimer Smith-Jones Jr. 69 | 70 | - description: Date of birth 71 | vars: 72 | resource: *patient_resource_json 73 | question: What is the patient's date of birth? 74 | assert: 75 | - type: equals 76 | value: '1990-01-15' 77 | 78 | - description: Nickname 79 | vars: 80 | resource: *patient_resource_json 81 | question: What is the patient's nickname? 82 | assert: 83 | - type: equals 84 | value: Jack 85 | 86 | - description: Complete home address 87 | vars: 88 | resource: *patient_resource_json 89 | question: What is the patient's complete home address in standard format? 90 | assert: 91 | - type: contains-any 92 | value: 93 | - 123 Main St, Apt 4B, Anytown, CA 90210 94 | - 123 Main St Apt 4B, Anytown, CA 90210 95 | - 123 Main St, Apt 4B Anytown CA 90210 96 | - 123 Main St Apt 4B Anytown, CA 90210 97 | 98 | - description: State of residence 99 | vars: 100 | resource: *patient_resource_json 101 | question: What state does the patient live in? 102 | assert: 103 | - type: equals 104 | value: CA 105 | 106 | - description: All given names in official name 107 | vars: 108 | resource: *patient_resource_json 109 | question: List all given names for the patient's official name. 110 | assert: 111 | - type: contains-any 112 | value: 113 | - John, Jacob, Jingleheimer 114 | - John Jacob Jingleheimer 115 | - "John\nJacob\nJingleheimer" 116 | 117 | - description: Maiden name 118 | vars: 119 | resource: *patient_resource_json 120 | question: What is the patient's maiden name? 121 | assert: 122 | - type: equals 123 | value: Johnson 124 | 125 | - description: All addresses 126 | vars: 127 | resource: *patient_resource_json 128 | question: What was the patient's previous address? 129 | assert: 130 | - type: contains-any 131 | value: 132 | - 456 Elm St, Oldtown, NY 54321 133 | - 456 Elm St, Oldtown, NY, 54321 134 | 135 | - description: Home address, line 2 136 | vars: 137 | resource: *patient_resource_json 138 | question: What is the second line of the patient's home address? 139 | assert: 140 | - type: equals 141 | value: Apt 4B 142 | 143 | - description: All telecoms 144 | vars: 145 | resource: *patient_resource_json 146 | question: List all contact methods for the patient in the format "value (system), value (system), etc.". 147 | assert: 148 | - type: equals 149 | value: 555-123-4567 (phone), john.smith@example.com (email) 150 | 151 | - description: Marital status code 152 | vars: 153 | resource: *patient_resource_json 154 | question: What is the patient's marital status code? 155 | assert: 156 | - type: equals 157 | value: M 158 | 159 | - description: Is the patient a multiple birth? True or false? 160 | vars: 161 | resource: *patient_resource_json 162 | question: Was the patient part of a multiple birth? 163 | assert: 164 | - type: equals 165 | value: true 166 | 167 | - description: Gender value 168 | vars: 169 | resource: *patient_resource_json 170 | question: What is the patient's gender? 171 | assert: 172 | - type: equals 173 | value: other 174 | -------------------------------------------------------------------------------- /evals/extraction/tests/conditions.yaml: -------------------------------------------------------------------------------- 1 | - vars: 2 | resource: &condition_resource_json | 3 | { 4 | "resourceType": "Condition", 5 | "id": "example", 6 | "clinicalStatus": { 7 | "coding": [ 8 | { 9 | "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", 10 | "code": "active", 11 | "display": "Active" 12 | } 13 | ] 14 | }, 15 | "verificationStatus": { 16 | "coding": [ 17 | { 18 | "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", 19 | "code": "confirmed", 20 | "display": "Confirmed" 21 | } 22 | ] 23 | }, 24 | "category": [ 25 | { 26 | "coding": [ 27 | { 28 | "system": "http://terminology.hl7.org/CodeSystem/condition-category", 29 | "code": "problem-list-item", 30 | "display": "Problem List Item" 31 | } 32 | ] 33 | } 34 | ], 35 | "severity": { 36 | "coding": [ 37 | { 38 | "system": "http://snomed.info/sct", 39 | "code": "24484000", 40 | "display": "Severe" 41 | } 42 | ] 43 | }, 44 | "code": { 45 | "coding": [ 46 | { 47 | "system": "http://snomed.info/sct", 48 | "code": "195967001", 49 | "display": "Asthma" 50 | } 51 | ] 52 | }, 53 | "onsetDateTime": "2020-03-15" 54 | } 55 | question: Is this an active severe condition, and if so, what is it? You must specify both the condition and the severity. 56 | assert: 57 | - type: factuality 58 | value: Yes, severe active asthma 59 | - vars: 60 | resource: *condition_resource_json 61 | question: When did the patient develop the condition? 62 | assert: 63 | - type: equals 64 | value: '2020-03-15' 65 | - vars: 66 | resource: *condition_resource_json 67 | question: What coding system is used to specify the condition's severity? 68 | assert: 69 | - type: equals 70 | value: http://snomed.info/sct 71 | - description: Condition with multiple codings for code 72 | vars: 73 | resource: | 74 | { 75 | "resourceType": "Condition", 76 | "clinicalStatus": { 77 | "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] 78 | }, 79 | "code": { 80 | "coding": [ 81 | { "system": "http://snomed.info/sct", "code": "44054006", "display": "Diabetes mellitus type 2" }, 82 | { "system": "http://icd10who.org", "code": "E11", "display": "Type 2 diabetes mellitus" } 83 | ] 84 | }, 85 | "onsetDateTime": "2018-06-01" 86 | } 87 | question: What is the SNOMED code for the condition? 88 | assert: 89 | - type: equals 90 | value: 44054006 91 | - description: Condition with missing severity 92 | vars: 93 | resource: | 94 | { 95 | "resourceType": "Condition", 96 | "clinicalStatus": { 97 | "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] 98 | }, 99 | "code": { 100 | "coding": [ { "system": "http://snomed.info/sct", "code": "38341003", "display": "Hypertension" } ] 101 | }, 102 | "onsetDateTime": "2019-01-01" 103 | } 104 | question: What is the severity of the condition? 105 | assert: 106 | - type: equals 107 | value: N/A 108 | - description: Condition with ambiguous severity (multiple codings) 109 | vars: 110 | resource: | 111 | { 112 | "resourceType": "Condition", 113 | "clinicalStatus": { 114 | "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] 115 | }, 116 | "severity": { 117 | "coding": [ 118 | { "system": "http://snomed.info/sct", "code": "255604002", "display": "Mild" }, 119 | { "system": "http://snomed.info/sct", "code": "24484000", "display": "Severe" } 120 | ] 121 | }, 122 | "code": { 123 | "coding": [ { "system": "http://snomed.info/sct", "code": "195967001", "display": "Asthma" } ] 124 | }, 125 | "onsetDateTime": "2021-05-10" 126 | } 127 | question: List all severities coded for this condition. 128 | assert: 129 | - type: contains-any 130 | value: 131 | - Mild, Severe 132 | - Mild,Severe 133 | - "Mild\nSevere" 134 | - description: Condition with reason reference 135 | vars: 136 | resource: | 137 | { 138 | "resourceType": "Condition", 139 | "clinicalStatus": { 140 | "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] 141 | }, 142 | "code": { 143 | "coding": [ { "system": "http://snomed.info/sct", "code": "195967001", "display": "Asthma" } ] 144 | }, 145 | "onsetDateTime": "2022-02-02", 146 | "evidence": [ 147 | { "detail": [ { "reference": "Observation/obs123" } ] } 148 | ] 149 | } 150 | question: What is the referenced evidence resource type? 151 | assert: 152 | - type: equals 153 | value: Observation 154 | - description: Ultimate Condition extraction challenge - Staging and Progression 155 | vars: 156 | resource: | 157 | { 158 | "resourceType": "Bundle", 159 | "type": "collection", 160 | "entry": [ 161 | { 162 | "fullUrl": "urn:uuid:cond-diabetes", 163 | "resource": { 164 | "resourceType": "Condition", 165 | "id": "cond-diabetes", 166 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] }, 167 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "confirmed" } ] }, 168 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "44054006", "display": "Diabetes mellitus type 2" } ], "text": "Diabetes mellitus type 2" }, 169 | "subject": { "reference": "Patient/example" }, 170 | "onsetDateTime": "2015-01-01" 171 | } 172 | }, 173 | { 174 | "fullUrl": "urn:uuid:cond-hypertension", 175 | "resource": { 176 | "resourceType": "Condition", 177 | "id": "cond-hypertension", 178 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] }, 179 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "confirmed" } ] }, 180 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "38341003", "display": "Hypertensive disorder, systemic arterial" } ], "text": "Hypertension" }, 181 | "subject": { "reference": "Patient/example" }, 182 | "onsetDateTime": "2016-01-01" 183 | } 184 | }, 185 | { 186 | "fullUrl": "urn:uuid:cond-ckd-stage2-resolved", 187 | "resource": { 188 | "resourceType": "Condition", 189 | "id": "cond-ckd-stage2-resolved", 190 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "resolved" } ] }, 191 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "confirmed" } ] }, 192 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "431856001", "display": "Chronic kidney disease stage 2" } ], "text": "CKD Stage 2" }, 193 | "subject": { "reference": "Patient/example" }, 194 | "onsetDateTime": "2020-03-15", 195 | "abatementDateTime": "2021-01-10" 196 | } 197 | }, 198 | { 199 | "fullUrl": "urn:uuid:cond-ckd-stage3a-active", 200 | "resource": { 201 | "resourceType": "Condition", 202 | "id": "cond-ckd-stage3a-active", 203 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] }, 204 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "confirmed" } ] }, 205 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "441208003", "display": "Chronic kidney disease stage 3A" } ], "text": "CKD Stage 3a" }, 206 | "subject": { "reference": "Patient/example" }, 207 | "onsetDateTime": "2021-02-20" 208 | } 209 | }, 210 | { 211 | "fullUrl": "urn:uuid:cond-ckd-general-error", 212 | "resource": { 213 | "resourceType": "Condition", 214 | "id": "cond-ckd-general-error", 215 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] }, 216 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "entered-in-error" } ] }, 217 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "709044004", "display": "Chronic kidney disease" } ], "text": "CKD general" }, 218 | "subject": { "reference": "Patient/example" }, 219 | "onsetDateTime": "2021-01-05" 220 | } 221 | }, 222 | { 223 | "fullUrl": "urn:uuid:cond-ckd-stage1-refuted", 224 | "resource": { 225 | "resourceType": "Condition", 226 | "id": "cond-ckd-stage1-refuted", 227 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] }, 228 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "refuted" } ] }, 229 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "431855002", "display": "Chronic kidney disease stage 1" } ], "text": "CKD Stage 1" }, 230 | "subject": { "reference": "Patient/example" }, 231 | "onsetDateTime": "2019-11-01" 232 | } 233 | }, 234 | { 235 | "fullUrl": "urn:uuid:cond-ckd-stage2-differential", 236 | "resource": { 237 | "resourceType": "Condition", 238 | "id": "cond-ckd-stage2-differential", 239 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "active" } ] }, 240 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "differential" } ] }, 241 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "431856001", "display": "Chronic kidney disease stage 2" } ], "text": "CKD Stage 2 (Differential)" }, 242 | "subject": { "reference": "Patient/example" }, 243 | "onsetDateTime": "2020-12-01" 244 | } 245 | }, 246 | { 247 | "fullUrl": "urn:uuid:cond-aki-resolved", 248 | "resource": { 249 | "resourceType": "Condition", 250 | "id": "cond-aki-resolved", 251 | "clinicalStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-clinical", "code": "resolved" } ] }, 252 | "verificationStatus": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/condition-ver-status", "code": "confirmed" } ] }, 253 | "code": { "coding": [ { "system": "http://snomed.info/sct", "code": "66547009", "display": "Acute kidney injury" } ], "text": "Acute Kidney Injury" }, 254 | "subject": { "reference": "Patient/example" }, 255 | "onsetDateTime": "2022-06-01", 256 | "abatementDateTime": "2022-07-15" 257 | } 258 | } 259 | ] 260 | } 261 | question: What is the SNOMED CT code for the highest currently active and confirmed stage of Chronic Kidney Disease (CKD) based on the patient's record? 262 | assert: 263 | - type: equals 264 | value: 441208003 # CKD Stage 3a 265 | -------------------------------------------------------------------------------- /evals/extraction/tests/medication-requests.yaml: -------------------------------------------------------------------------------- 1 | - vars: 2 | resource: &medication_request_resource_json | 3 | { 4 | "resourceType": "MedicationRequest", 5 | "id": "example", 6 | "status": "active", 7 | "intent": "order", 8 | "medicationCodeableConcept": { 9 | "coding": [ 10 | { 11 | "system": "http://www.nlm.nih.gov/research/umls/rxnorm", 12 | "code": "1049502", 13 | "display": "Acetaminophen 325 MG" 14 | } 15 | ] 16 | }, 17 | "dosageInstruction": [ 18 | { 19 | "sequence": 1, 20 | "timing": { 21 | "repeat": { 22 | "frequency": 4, 23 | "period": 1, 24 | "periodUnit": "d" 25 | } 26 | }, 27 | "doseAndRate": [ 28 | { 29 | "type": { 30 | "coding": [ 31 | { 32 | "system": "http://terminology.hl7.org/CodeSystem/dose-rate-type", 33 | "code": "ordered", 34 | "display": "Ordered" 35 | } 36 | ] 37 | }, 38 | "doseQuantity": { 39 | "value": 1, 40 | "unit": "tablet" 41 | } 42 | } 43 | ] 44 | } 45 | ] 46 | } 47 | question: What is the daily frequency? 48 | assert: 49 | - type: equals 50 | value: 4 51 | - vars: 52 | resource: *medication_request_resource_json 53 | question: What is the medication name and strength? 54 | assert: 55 | - type: equals 56 | value: Acetaminophen 325 MG 57 | - vars: 58 | resource: *medication_request_resource_json 59 | question: What is the dose quantity? 60 | assert: 61 | - type: equals 62 | value: 1 tablet 63 | - vars: 64 | resource: *medication_request_resource_json 65 | question: What is the status of the medication request? 66 | assert: 67 | - type: equals 68 | value: active 69 | - vars: 70 | resource: *medication_request_resource_json 71 | question: Does the medication have an RxNorm code? If yes, what is it? 72 | assert: 73 | - type: contains 74 | value: '1049502' 75 | - description: MedicationRequest with multiple codings in medicationCodeableConcept 76 | vars: 77 | resource: | 78 | { 79 | "resourceType": "MedicationRequest", 80 | "status": "active", 81 | "intent": "order", 82 | "medicationCodeableConcept": { 83 | "coding": [ 84 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "316074", "display": "Ibuprofen 200 MG" }, 85 | { "system": "http://snomed.info/sct", "code": "387207008", "display": "Ibuprofen" } 86 | ] 87 | }, 88 | "dosageInstruction": [ 89 | { 90 | "doseAndRate": [ 91 | { "doseQuantity": { "value": 2, "unit": "tablet" } } 92 | ] 93 | } 94 | ] 95 | } 96 | question: What is the RxNorm code for the medication? 97 | assert: 98 | - type: equals 99 | value: 316074 100 | - description: MedicationRequest with missing dose unit 101 | vars: 102 | resource: | 103 | { 104 | "resourceType": "MedicationRequest", 105 | "status": "active", 106 | "intent": "order", 107 | "medicationCodeableConcept": { 108 | "coding": [ 109 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "197361", "display": "Lisinopril 10 MG" } 110 | ] 111 | }, 112 | "dosageInstruction": [ 113 | { 114 | "doseAndRate": [ 115 | { "doseQuantity": { "value": 1 } } 116 | ] 117 | } 118 | ] 119 | } 120 | question: What is the dose quantity (including unit, if available)? 121 | assert: 122 | - type: equals 123 | value: 1 124 | - description: MedicationRequest with dose as a range 125 | vars: 126 | resource: | 127 | { 128 | "resourceType": "MedicationRequest", 129 | "status": "active", 130 | "intent": "order", 131 | "medicationCodeableConcept": { 132 | "coding": [ 133 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "617314", "display": "Metformin 500 MG" } 134 | ] 135 | }, 136 | "dosageInstruction": [ 137 | { 138 | "doseAndRate": [ 139 | { "doseRange": { "low": { "value": 1, "unit": "tablet" }, "high": { "value": 2, "unit": "tablet" } } } 140 | ] 141 | } 142 | ] 143 | } 144 | question: What is the dose range (including units)? 145 | assert: 146 | - type: contains-any 147 | value: 148 | - '1-2 tablet' 149 | - '1 tablet - 2 tablet' 150 | - '1-2 tablets' 151 | - '1 to 2 tablets' 152 | - '1 to 2 tablet' 153 | - 1 tablet-2 tablet 154 | - description: MedicationRequest with coded reason for prescription 155 | vars: 156 | resource: | 157 | { 158 | "resourceType": "MedicationRequest", 159 | "status": "active", 160 | "intent": "order", 161 | "medicationCodeableConcept": { 162 | "coding": [ 163 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "1049630", "display": "Atorvastatin 20 MG" } 164 | ] 165 | }, 166 | "reasonCode": [ 167 | { "coding": [ { "system": "http://snomed.info/sct", "code": "13644009", "display": "Hypercholesterolemia" } ] } 168 | ], 169 | "dosageInstruction": [ 170 | { 171 | "doseAndRate": [ 172 | { "doseQuantity": { "value": 1, "unit": "tablet" } } 173 | ] 174 | } 175 | ] 176 | } 177 | question: What is the coded reason for this prescription? Please return the code only. 178 | assert: 179 | - type: equals 180 | value: 13644009 181 | - description: MedicationRequest with non-standard status 182 | vars: 183 | resource: | 184 | { 185 | "resourceType": "MedicationRequest", 186 | "status": "on-hold", 187 | "intent": "order", 188 | "medicationCodeableConcept": { 189 | "coding": [ 190 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "617314", "display": "Metformin 500 MG" } 191 | ] 192 | }, 193 | "dosageInstruction": [ 194 | { 195 | "doseAndRate": [ 196 | { "doseQuantity": { "value": 1, "unit": "tablet" } } 197 | ] 198 | } 199 | ] 200 | } 201 | question: What is the status of this medication request? 202 | assert: 203 | - type: equals 204 | value: on-hold 205 | - description: Ultimate MedicationRequest extraction challenge 206 | vars: 207 | resource: | 208 | { 209 | "resourceType": "Bundle", 210 | "type": "collection", 211 | "entry": [ 212 | { 213 | "resource": { 214 | "resourceType": "MedicationRequest", 215 | "status": "active", 216 | "intent": "order", 217 | "medicationCodeableConcept": { 218 | "coding": [ 219 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "860975", "display": "Ibuprofen 200 MG" }, 220 | { "system": "http://snomed.info/sct", "code": "387207008", "display": "Ibuprofen" } 221 | ] 222 | }, 223 | "dosageInstruction": [ 224 | { 225 | "doseAndRate": [ 226 | { "doseQuantity": { "value": 2, "unit": "tablet" } } 227 | ] 228 | } 229 | ], 230 | "reasonCode": [ 231 | { "coding": [ { "system": "http://snomed.info/sct", "code": "386661006", "display": "Fever" } ] } 232 | ], 233 | "extension": [ 234 | { 235 | "url": "http://example.org/fhir/StructureDefinition/medicationrequest-priority", 236 | "valueCode": "routine" 237 | } 238 | ] 239 | } 240 | }, 241 | { 242 | "resource": { 243 | "resourceType": "MedicationRequest", 244 | "status": "on-hold", 245 | "intent": "order", 246 | "medicationCodeableConcept": { 247 | "coding": [ 248 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "617314", "display": "Metformin 500 MG" }, 249 | { "system": "http://snomed.info/sct", "code": "860975", "display": "Ibuprofen 200 MG" } 250 | ] 251 | }, 252 | "dosageInstruction": [ 253 | { 254 | "doseAndRate": [ 255 | { "doseRange": { "low": { "value": 1, "unit": "tablet" }, "high": { "value": 2, "unit": "tablet" } } } 256 | ] 257 | } 258 | ], 259 | "reasonCode": [ 260 | { "coding": [ { "system": "http://snomed.info/sct", "code": "44054006", "display": "Diabetes mellitus type 2" } ] } 261 | ] 262 | } 263 | }, 264 | { 265 | "resource": { 266 | "resourceType": "MedicationRequest", 267 | "status": "cancelled", 268 | "intent": "order", 269 | "medicationCodeableConcept": { 270 | "coding": [ 271 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "1049630", "display": "Atorvastatin 20 MG" } 272 | ] 273 | }, 274 | "dosageInstruction": [ 275 | { 276 | "doseAndRate": [ 277 | { "doseQuantity": { "value": 1 } } 278 | ] 279 | } 280 | ], 281 | "reasonCode": [ 282 | { "coding": [ { "system": "http://snomed.info/sct", "code": "13644009", "display": "Hypercholesterolemia" } ] } 283 | ] 284 | } 285 | }, 286 | { 287 | "resource": { 288 | "resourceType": "MedicationRequest", 289 | "status": "active", 290 | "intent": "order", 291 | "medicationCodeableConcept": { 292 | "coding": [ 293 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "197361", "display": "Lisinopril 10 MG" } 294 | ] 295 | }, 296 | "dosageInstruction": [ 297 | { 298 | "doseAndRate": [ 299 | { "doseQuantity": { "value": 1, "unit": "tablet" } } 300 | ] 301 | } 302 | ], 303 | "reasonCode": [ 304 | { "coding": [ { "system": "http://snomed.info/sct", "code": "38341003", "display": "Hypertension" } ] } 305 | ] 306 | } 307 | }, 308 | { 309 | "resource": { 310 | "resourceType": "MedicationRequest", 311 | "status": "active", 312 | "intent": "order", 313 | "medicationCodeableConcept": { 314 | "coding": [ 315 | { "system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "617314", "display": "Metformin 500 MG" } 316 | ] 317 | }, 318 | "dosageInstruction": [ 319 | { 320 | "doseAndRate": [ 321 | { "doseQuantity": { "value": 1, "unit": "tablet" } } 322 | ] 323 | } 324 | ], 325 | "reasonCode": [ 326 | { "coding": [ { "system": "http://snomed.info/sct", "code": "44054006", "display": "Diabetes mellitus type 2" } ] } 327 | ] 328 | } 329 | } 330 | ] 331 | } 332 | question: What is the active medication for diabetes mellitus type 2 (RxNorm code)? 333 | assert: 334 | - type: equals 335 | value: 617314 336 | -------------------------------------------------------------------------------- /evals/extraction/tests/observations.yaml: -------------------------------------------------------------------------------- 1 | - description: Systolic blood pressure extraction 2 | vars: 3 | resource: &observation_resource_json | 4 | { 5 | "resourceType": "Observation", 6 | "id": "blood-pressure", 7 | "status": "final", 8 | "category": [ 9 | { 10 | "coding": [ 11 | { 12 | "system": "http://terminology.hl7.org/CodeSystem/observation-category", 13 | "code": "vital-signs", 14 | "display": "Vital Signs" 15 | } 16 | ] 17 | } 18 | ], 19 | "code": { 20 | "coding": [ 21 | { 22 | "system": "http://loinc.org", 23 | "code": "85354-9", 24 | "display": "Blood pressure panel" 25 | } 26 | ] 27 | }, 28 | "component": [ 29 | { 30 | "code": { 31 | "coding": [ 32 | { 33 | "system": "http://loinc.org", 34 | "code": "8480-6", 35 | "display": "Systolic blood pressure" 36 | } 37 | ] 38 | }, 39 | "valueQuantity": { 40 | "value": 120, 41 | "unit": "mmHg" 42 | } 43 | }, 44 | { 45 | "code": { 46 | "coding": [ 47 | { 48 | "system": "http://loinc.org", 49 | "code": "8462-4", 50 | "display": "Diastolic blood pressure" 51 | } 52 | ] 53 | }, 54 | "valueQuantity": { 55 | "value": 80, 56 | "unit": "mmHg" 57 | } 58 | } 59 | ] 60 | } 61 | question: What is the systolic blood pressure value (including unit)? 62 | assert: 63 | - type: equals 64 | value: 120 mmHg 65 | - description: Diastolic blood pressure extraction 66 | vars: 67 | resource: *observation_resource_json 68 | question: What is the diastolic blood pressure value (including unit)? 69 | assert: 70 | - type: equals 71 | value: 80 mmHg 72 | - description: Complete blood pressure reading in systolic/diastolic format 73 | vars: 74 | resource: *observation_resource_json 75 | question: What is the complete blood pressure reading in systolic/diastolic format? 76 | assert: 77 | - type: equals 78 | value: 120/80 mmHg 79 | - description: CodeableConcept extraction without display 80 | vars: 81 | resource: | 82 | { 83 | "resourceType": "Observation", 84 | "id": "dce7c80f-36fa-4693-bce8-75ca9d90a53c", 85 | "status": "final", 86 | "category": [ 87 | { 88 | "coding": [ 89 | { 90 | "system": "http://terminology.hl7.org/CodeSystem/observation-category", 91 | "code": "vital-signs", 92 | } 93 | ] 94 | } 95 | ], 96 | "code": { 97 | "coding": [ 98 | { 99 | "system": "http://loinc.org", 100 | "code": "85354-9", 101 | } 102 | ] 103 | }, 104 | "component": [ 105 | { 106 | "code": { 107 | "coding": [ 108 | { 109 | "system": "http://loinc.org", 110 | "code": "8480-6", 111 | } 112 | ] 113 | }, 114 | "valueQuantity": { 115 | "value": 120, 116 | "unit": "mmHg" 117 | } 118 | }, 119 | { 120 | "code": { 121 | "coding": [ 122 | { 123 | "system": "http://loinc.org", 124 | "code": "8462-4", 125 | } 126 | ] 127 | }, 128 | "valueQuantity": { 129 | "value": 80, 130 | "unit": "mmHg" 131 | } 132 | } 133 | ] 134 | } 135 | question: What is the diastolic blood pressure value (including unit)? 136 | assert: 137 | - type: equals 138 | value: 80 mmHg 139 | - description: Systolic blood pressure with missing unit 140 | vars: 141 | resource: | 142 | { 143 | "resourceType": "Observation", 144 | "id": "a1b2c3d4", 145 | "status": "final", 146 | "code": { 147 | "coding": [ 148 | { "system": "http://loinc.org", "code": "85354-9" } 149 | ] 150 | }, 151 | "component": [ 152 | { 153 | "code": { "coding": [ { "system": "http://loinc.org", "code": "8480-6" } ] }, 154 | "valueQuantity": { "value": 130 } 155 | } 156 | ] 157 | } 158 | question: What is the systolic blood pressure value? 159 | assert: 160 | - type: equals 161 | value: 130 162 | - description: Diastolic blood pressure as string value 163 | vars: 164 | resource: | 165 | { 166 | "resourceType": "Observation", 167 | "id": "e5f6g7h8", 168 | "status": "final", 169 | "code": { 170 | "coding": [ 171 | { "system": "http://loinc.org", "code": "85354-9" } 172 | ] 173 | }, 174 | "component": [ 175 | { 176 | "code": { "coding": [ { "system": "http://loinc.org", "code": "8462-4" } ] }, 177 | "valueString": "eighty-two mmHg" 178 | } 179 | ] 180 | } 181 | question: What is the diastolic blood pressure value? 182 | assert: 183 | - type: contains-any 184 | value: 185 | - eighty-two mmHg 186 | - 82 mmHg 187 | - description: Blood pressure with extra irrelevant component 188 | vars: 189 | resource: | 190 | { 191 | "resourceType": "Observation", 192 | "id": "i9j0k1l2", 193 | "status": "final", 194 | "code": { 195 | "coding": [ 196 | { "system": "http://loinc.org", "code": "85354-9" } 197 | ] 198 | }, 199 | "component": [ 200 | { 201 | "code": { "coding": [ { "system": "http://loinc.org", "code": "8480-6" } ] }, 202 | "valueQuantity": { "value": 110, "unit": "mmHg" } 203 | }, 204 | { 205 | "code": { "coding": [ { "system": "http://loinc.org", "code": "8462-4" } ] }, 206 | "valueQuantity": { "value": 70, "unit": "mmHg" } 207 | }, 208 | { 209 | "code": { "coding": [ { "system": "http://loinc.org", "code": "9999-9" } ] }, 210 | "valueQuantity": { "value": 999, "unit": "foo" } 211 | } 212 | ] 213 | } 214 | question: What is the complete blood pressure reading in systolic/diastolic format? 215 | assert: 216 | - type: contains-any 217 | value: 218 | - 110/70 mmHg 219 | - 110 mmHg/70 mmHg 220 | - description: Blood pressure with non-standard code 221 | vars: 222 | resource: | 223 | { 224 | "resourceType": "Observation", 225 | "id": "m3n4o5p6", 226 | "status": "final", 227 | "code": { 228 | "coding": [ 229 | { "system": "http://loinc.org", "code": "99999-9" } 230 | ] 231 | }, 232 | "component": [ 233 | { 234 | "code": { "coding": [ { "system": "http://loinc.org", "code": "8480-6" } ] }, 235 | "valueQuantity": { "value": 115, "unit": "mmHg" } 236 | }, 237 | { 238 | "code": { "coding": [ { "system": "http://loinc.org", "code": "8462-4" } ] }, 239 | "valueQuantity": { "value": 75, "unit": "mmHg" } 240 | } 241 | ] 242 | } 243 | question: What is the complete blood pressure reading in systolic/diastolic format? 244 | assert: 245 | - type: contains-any 246 | value: 247 | - 115/75 mmHg 248 | - 115 mmHg/75 mmHg 249 | - description: Potassium value only in extension 250 | vars: 251 | resource: | 252 | { 253 | "resourceType": "Observation", 254 | "id": "z1x2c3v4", 255 | "status": "final", 256 | "code": { 257 | "coding": [ 258 | { "system": "http://loinc.org", "code": "2823-3", "display": "Potassium [Moles/volume] in Serum or Plasma" } 259 | ] 260 | }, 261 | "extension": [ 262 | { 263 | "url": "http://example.org/fhir/StructureDefinition/observation-value", 264 | "valueQuantity": { "value": 4.2, "unit": "mmol/L" } 265 | } 266 | ] 267 | } 268 | question: What is the potassium value (including unit)? 269 | assert: 270 | - type: equals 271 | value: 4.2 mmol/L 272 | - description: Glucose with multiple value types 273 | vars: 274 | resource: | 275 | { 276 | "resourceType": "Observation", 277 | "id": "g5h6j7k8", 278 | "status": "final", 279 | "code": { 280 | "coding": [ 281 | { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } 282 | ] 283 | }, 284 | "valueQuantity": { "value": 95, "unit": "mg/dL" }, 285 | "interpretation": [ 286 | { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/v3-ObservationInterpretation", "code": "N", "display": "Normal" } ] } 287 | ] 288 | } 289 | question: What is the glucose value (including unit)? 290 | assert: 291 | - type: equals 292 | value: 95 mg/dL 293 | - description: Sodium with coded interpretation only 294 | vars: 295 | resource: | 296 | { 297 | "resourceType": "Observation", 298 | "id": "s1o2d3i4", 299 | "status": "final", 300 | "code": { 301 | "coding": [ 302 | { "system": "http://loinc.org", "code": "2951-2", "display": "Sodium [Moles/volume] in Serum or Plasma" } 303 | ] 304 | }, 305 | "valueQuantity": { "value": 150, "unit": "mmol/L" }, 306 | "interpretation": [ 307 | { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/v3-ObservationInterpretation", "code": "H", "display": "High" } ] } 308 | ] 309 | } 310 | question: What is the sodium interpretation code? 311 | assert: 312 | - type: equals 313 | value: H 314 | - description: Cholesterol as a range 315 | vars: 316 | resource: | 317 | { 318 | "resourceType": "Observation", 319 | "id": "c1h2o3l4", 320 | "status": "final", 321 | "code": { 322 | "coding": [ 323 | { "system": "http://loinc.org", "code": "2093-3", "display": "Cholesterol [Mass/volume] in Serum or Plasma" } 324 | ] 325 | }, 326 | "valueRange": { 327 | "low": { "value": 180, "unit": "mg/dL" }, 328 | "high": { "value": 200, "unit": "mg/dL" } 329 | } 330 | } 331 | question: What is the cholesterol value range (including units)? 332 | assert: 333 | - type: contains-any 334 | value: 335 | - 180-200 mg/dL 336 | - 100 - 200 mg/dL 337 | - 180 mg/dL - 200 mg/dL 338 | - 180 mg/dL to 200 mg/dL 339 | - 180 - 200 mg/dL 340 | - description: Hemoglobin A1c with multiple codings 341 | vars: 342 | resource: | 343 | { 344 | "resourceType": "Observation", 345 | "id": "h1a2b3c4", 346 | "status": "final", 347 | "code": { 348 | "coding": [ 349 | { "system": "http://loinc.org", "code": "4548-4", "display": "Hemoglobin A1c/Hemoglobin.total in Blood" }, 350 | { "system": "http://snomed.info/sct", "code": "43396009", "display": "Hemoglobin A1c measurement" } 351 | ] 352 | }, 353 | "valueQuantity": { "value": 6.1, "unit": "%" } 354 | } 355 | question: What is the hemoglobin A1c value (including unit)? 356 | assert: 357 | - type: contains-any 358 | value: 359 | - 6.1 % 360 | - 6.1% 361 | - description: TSH value only in narrative 362 | vars: 363 | resource: | 364 | { 365 | "resourceType": "Observation", 366 | "id": "t1s2h3n4", 367 | "status": "final", 368 | "code": { 369 | "coding": [ 370 | { "system": "http://loinc.org", "code": "3016-3", "display": "Thyrotropin [Units/volume] in Serum or Plasma" } 371 | ] 372 | }, 373 | "text": { 374 | "status": "generated", 375 | "div": "
TSH: 2.5 mIU/L
" 376 | } 377 | } 378 | question: What is the TSH value (including unit)? 379 | assert: 380 | - type: equals 381 | value: 2.5 mIU/L 382 | - description: Creatinine value in referenced DiagnosticReport 383 | vars: 384 | resource: | 385 | { 386 | "resourceType": "Bundle", 387 | "type": "collection", 388 | "entry": [ 389 | { 390 | "resource": { 391 | "resourceType": "Observation", 392 | "id": "cr1e2a3t4", 393 | "status": "final", 394 | "code": { 395 | "coding": [ 396 | { "system": "http://loinc.org", "code": "2160-0", "display": "Creatinine [Mass/volume] in Serum or Plasma" } 397 | ] 398 | }, 399 | "derivedFrom": [ 400 | { "reference": "DiagnosticReport/dr1234" } 401 | ] 402 | } 403 | }, 404 | { 405 | "resource": { 406 | "resourceType": "DiagnosticReport", 407 | "id": "dr1234", 408 | "result": [ 409 | { "reference": "Observation/cr1e2a3t4-value" } 410 | ] 411 | } 412 | }, 413 | { 414 | "resource": { 415 | "resourceType": "Observation", 416 | "id": "cr1e2a3t4-value", 417 | "valueQuantity": { "value": 1.1, "unit": "mg/dL" } 418 | } 419 | } 420 | ] 421 | } 422 | question: What is the creatinine value (including unit)? 423 | assert: 424 | - type: equals 425 | value: 1.1 mg/dL 426 | - description: Vitamin D with localized display 427 | vars: 428 | resource: | 429 | { 430 | "resourceType": "Observation", 431 | "id": "v1d2e3f4", 432 | "status": "final", 433 | "code": { 434 | "coding": [ 435 | { "system": "http://loinc.org", "code": "1989-3", "display": "Vitamina D" } 436 | ] 437 | }, 438 | "valueQuantity": { "value": 30, "unit": "ng/mL" } 439 | } 440 | question: What is the vitamin D value (including unit)? 441 | assert: 442 | - type: equals 443 | value: 30 ng/mL 444 | - description: White blood cell count with conflicting values 445 | vars: 446 | resource: | 447 | { 448 | "resourceType": "Observation", 449 | "id": "w1b2c3c4", 450 | "status": "final", 451 | "code": { 452 | "coding": [ 453 | { "system": "http://loinc.org", "code": "6690-2", "display": "Leukocytes [#/volume] in Blood by Automated count" } 454 | ] 455 | }, 456 | "valueQuantity": { "value": 7.0, "unit": "10^3/uL" }, 457 | "component": [ 458 | { 459 | "code": { "coding": [ { "system": "http://loinc.org", "code": "6690-2" } ] }, 460 | "valueQuantity": { "value": 6.8, "unit": "10^3/uL" } 461 | } 462 | ] 463 | } 464 | question: What is the main white blood cell count value (including unit)? 465 | assert: 466 | - type: equals 467 | value: 7.0 10^3/uL 468 | - description: Calcium with unusual units 469 | vars: 470 | resource: | 471 | { 472 | "resourceType": "Observation", 473 | "id": "ca1l2c3i4", 474 | "status": "final", 475 | "code": { 476 | "coding": [ 477 | { "system": "http://loinc.org", "code": "17861-6", "display": "Calcium [Moles/volume] in Serum or Plasma" } 478 | ] 479 | }, 480 | "valueQuantity": { "value": 2.2, "unit": "mmol/L", "code": "mg/dL" } 481 | } 482 | question: What is the calcium value (including unit)? 483 | assert: 484 | - type: equals 485 | value: 2.2 mmol/L 486 | - description: Glucose - find the most recent value from a bundle of many observations (with out-of-order dates) 487 | vars: 488 | resource: | 489 | { 490 | "resourceType": "Bundle", 491 | "type": "collection", 492 | "entry": [ 493 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 90, "unit": "mg/dL" }, "effectiveDateTime": "2022-01-01T08:00:00Z" } }, 494 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 110, "unit": "mg/dL" }, "effectiveDateTime": "2022-02-01T08:00:00Z" } }, 495 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 105, "unit": "mg/dL" }, "effectiveDateTime": "2022-01-15T08:00:00Z" } }, 496 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 99, "unit": "mg/dL" }, "effectiveDateTime": "2021-12-31T08:00:00Z" } }, 497 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 115, "unit": "mg/dL" }, "effectiveDateTime": "2022-03-01T08:00:00Z" } }, 498 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 101, "unit": "mg/dL" }, "effectiveDateTime": "2022-02-28T23:59:59Z" } }, 499 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 112, "unit": "mg/dL" }, "effectiveDateTime": "2022-02-01T07:59:59Z" } }, 500 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 108, "unit": "mg/dL" }, "effectiveDateTime": "2022-03-01T07:59:59Z" } }, 501 | { "resource": { "resourceType": "Observation", "status": "final", "code": { "coding": [ { "system": "http://loinc.org", "code": "2345-7", "display": "Glucose [Mass/volume] in Blood" } ] }, "valueQuantity": { "value": 113, "unit": "mg/dL" }, "effectiveDateTime": "2022-02-01T08:00:01Z" } } 502 | ] 503 | } 504 | question: What is the most recent glucose value (including unit)? 505 | assert: 506 | - type: equals 507 | value: 115 mg/dL 508 | - description: Ultimate lab value extraction challenge 509 | vars: 510 | resource: | 511 | { 512 | "resourceType": "Bundle", 513 | "type": "collection", 514 | "entry": [ 515 | { 516 | "resource": { 517 | "resourceType": "Observation", 518 | "status": "final", 519 | "subject": { "reference": "Patient/123" }, 520 | "specimen": { "display": "Serum" }, 521 | "code": { 522 | "coding": [ 523 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" }, 524 | { "system": "http://snomed.info/sct", "code": "43396009", "display": "Fasting glucose" } 525 | ] 526 | }, 527 | "valueQuantity": { "value": 98, "unit": "mg/dL" }, 528 | "effectiveDateTime": "2022-04-01T09:00:00Z" 529 | } 530 | }, 531 | { 532 | "resource": { 533 | "resourceType": "Observation", 534 | "status": "final", 535 | "subject": { "reference": "Patient/123" }, 536 | "specimen": { "display": "Serum" }, 537 | "code": { 538 | "coding": [ 539 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 540 | ] 541 | }, 542 | "extension": [ 543 | { 544 | "url": "http://example.org/fhir/StructureDefinition/observation-value", 545 | "valueQuantity": { "value": 102, "unit": "mg/dL" } 546 | } 547 | ], 548 | "effectiveDateTime": "2022-05-01T09:00:00Z" 549 | } 550 | }, 551 | { 552 | "resource": { 553 | "resourceType": "Observation", 554 | "status": "entered-in-error", 555 | "subject": { "reference": "Patient/123" }, 556 | "specimen": { "display": "Serum" }, 557 | "code": { 558 | "coding": [ 559 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 560 | ] 561 | }, 562 | "valueQuantity": { "value": 200, "unit": "mg/dL" }, 563 | "effectiveDateTime": "2022-06-01T09:00:00Z" 564 | } 565 | }, 566 | { 567 | "resource": { 568 | "resourceType": "Observation", 569 | "status": "final", 570 | "subject": { "reference": "Patient/456" }, 571 | "specimen": { "display": "Serum" }, 572 | "code": { 573 | "coding": [ 574 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 575 | ] 576 | }, 577 | "valueQuantity": { "value": 99, "unit": "mg/dL" }, 578 | "effectiveDateTime": "2022-07-01T09:00:00Z" 579 | } 580 | }, 581 | { 582 | "resource": { 583 | "resourceType": "Observation", 584 | "status": "final", 585 | "subject": { "reference": "Patient/123" }, 586 | "specimen": { "display": "Plasma" }, 587 | "code": { 588 | "coding": [ 589 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 590 | ] 591 | }, 592 | "valueQuantity": { "value": 100, "unit": "mg/dL" }, 593 | "effectiveDateTime": "2022-05-02T09:00:00Z" 594 | } 595 | }, 596 | { 597 | "resource": { 598 | "resourceType": "Observation", 599 | "status": "final", 600 | "subject": { "reference": "Patient/123" }, 601 | "specimen": { "display": "Serum" }, 602 | "code": { 603 | "coding": [ 604 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 605 | ] 606 | }, 607 | "valueString": "one hundred and five mg/dL", 608 | "effectiveDateTime": "2022-05-03T09:00:00Z" 609 | } 610 | }, 611 | { 612 | "resource": { 613 | "resourceType": "Observation", 614 | "status": "final", 615 | "subject": { "reference": "Patient/123" }, 616 | "specimen": { "display": "Serum" }, 617 | "code": { 618 | "coding": [ 619 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 620 | ] 621 | }, 622 | "valueQuantity": { "value": 5.8, "unit": "mmol/L" }, 623 | "effectiveDateTime": "2022-05-04T09:00:00Z" 624 | } 625 | }, 626 | { 627 | "resource": { 628 | "resourceType": "Observation", 629 | "status": "final", 630 | "subject": { "reference": "Patient/123" }, 631 | "specimen": { "display": "Serum" }, 632 | "code": { 633 | "coding": [ 634 | { "system": "http://loinc.org", "code": "1558-6", "display": "Glucose [Mass/volume] in Serum or Plasma --Fasting" } 635 | ] 636 | }, 637 | "text": { 638 | "status": "generated", 639 | "div": "
Glucose: 106 mg/dL
" 640 | }, 641 | "effectiveDateTime": "2022-05-05T09:00:00Z" 642 | } 643 | } 644 | ] 645 | } 646 | question: What is the most recent, valid, fasting serum glucose value (in mg/dL) for patient "Patient/123", considering all available information, and normalizing units if necessary? 647 | assert: 648 | - type: equals 649 | value: 106 mg/dL 650 | -------------------------------------------------------------------------------- /evals/extraction/tests/patient-history.yaml: -------------------------------------------------------------------------------- 1 | - description: Martial status 2 | vars: 3 | resource: 4 | - file://tests/patient-history.json 5 | question: What is the patient's recorded marital status (just the code)? 6 | assert: 7 | - type: equals 8 | value: D 9 | - description: Clinical event 10 | vars: 11 | resource: 12 | - file://tests/patient-history.json 13 | question: Which SNOMED CT code identifies the procedure documented in the April 18 1979 encounter? 14 | assert: 15 | - type: equals 16 | value: 162673000 17 | - description: BMI 18 | vars: 19 | resource: 20 | - file://tests/patient-history.json 21 | question: What was the patient's Body-mass index (BMI) value (with units) measured in May 2024? 22 | assert: 23 | - type: equals 24 | value: 28.14 kg/m2 25 | - description: Hard-mode 26 | vars: 27 | resource: 28 | - file://tests/patient-history.json 29 | question: On which exact date did the patient transition from full-time to part-time employment, and which insurer was billed for the encounter where that change was first documented? 30 | assert: 31 | - type: factuality 32 | value: May 8 2024 and Anthem 33 | - description: Age calculation and temporal reasoning 34 | vars: 35 | resource: 36 | - file://tests/patient-history.json 37 | question: How old was the patient when they received their higher education finding, and how many years passed before they had their first lipid panel performed? 38 | assert: 39 | - type: factuality 40 | value: 18 years old when received higher education finding, and 46 years passed before first lipid panel in May 2025 41 | - description: Insurance transitions and coverage gaps 42 | vars: 43 | resource: 44 | - file://tests/patient-history.json 45 | question: List all the insurance providers the patient has had in chronological order, including when they had no insurance, and identify the longest continuous period with the same coverage. 46 | assert: 47 | - type: factuality 48 | value: NO_INSURANCE (1979), Aetna (2001), Cigna Health (2017), Anthem (2023-2025). Longest continuous coverage was with Anthem from 2023-2025. 49 | - description: Substance use screening interpretation 50 | vars: 51 | resource: 52 | - file://tests/patient-history.json 53 | question: What substance use screening tools were administered in May 2024, what were the scores? 54 | assert: 55 | - type: factuality 56 | value: PHQ-2 (score 0) and DAST-10 (score 1) were administered. 57 | - description: Social determinants and clinical correlations 58 | vars: 59 | resource: 60 | - file://tests/patient-history.json 61 | question: According to the PRAPARE assessment, what was the patient's employment status, education level, and stress level? 62 | assert: 63 | - type: factuality 64 | value: Part-time employment, completed more than high school education, and reported no stress. 65 | - description: Vital signs trend analysis 66 | vars: 67 | resource: 68 | - file://tests/patient-history.json 69 | question: Compare the patient's blood pressure readings between May 2024 and May 2025, and determine if there was improvement or deterioration. What was the pain score trend during the same period? 70 | assert: 71 | - type: factuality 72 | value: Blood pressure improved from 102/84 mmHg (May 2024) to 104/79 mmHg (May 2025) - diastolic decreased by 5 mmHg. Pain score worsened from 2/10 to 4/10. 73 | - description: Complex condition timeline 74 | vars: 75 | resource: 76 | - file://tests/patient-history.json 77 | question: What conditions were documented as resolved during the patient's care, when were they first diagnosed, and what was the duration of each condition? 78 | assert: 79 | - type: factuality 80 | value: Three conditions resolved - Full-time employment (May 3, 2023 to May 8, 2024, duration 370 days), Social isolation (May 3, 2023 to May 8, 2024, duration 370 days), and Medication review due (May 8, 2024 to May 14, 2025, duration 371 days) 81 | -------------------------------------------------------------------------------- /evals/generation/config-multi-turn-tool-use.js: -------------------------------------------------------------------------------- 1 | import validateFhirBundle from '../../tools/validateFhirBundle.mjs'; 2 | 3 | const openAItools = { 4 | tools: [ 5 | { 6 | type: 'function', 7 | name: 'validate_fhir_bundle', 8 | description: 9 | 'Validate a FHIR bundle - you should use this tool recursively to fix errors, using it again after you have called it to ensure that FHIR resources are fully valid after making changes', 10 | parameters: { 11 | type: 'object', 12 | properties: { 13 | bundle: { 14 | type: 'object', 15 | description: 'The FHIR bundle to validate', 16 | }, 17 | }, 18 | required: ['bundle'], 19 | }, 20 | }, 21 | ], 22 | functionToolCallbacks: { 23 | validate_fhir_bundle: validateFhirBundle, 24 | }, 25 | }; 26 | 27 | const anthropicTools = { 28 | tools: [ 29 | { 30 | name: 'validate_fhir_bundle', 31 | description: 32 | 'Validate a FHIR bundle - you should use this tool recursively to fix errors, using it again after you have called it to ensure that FHIR resources are fully valid after making changes', 33 | input_schema: { 34 | type: 'object', 35 | properties: { 36 | bundle: { 37 | type: 'object', 38 | description: 'The FHIR bundle to validate', 39 | }, 40 | }, 41 | required: ['bundle'], 42 | }, 43 | }, 44 | ], 45 | functionToolCallbacks: { 46 | validate_fhir_bundle: validateFhirBundle, 47 | }, 48 | }; 49 | 50 | const anthropicModel = { 51 | id: 'file://../../providers/AnthropicMessagesWithRecursiveToolCallsProvider.ts', 52 | transform: 'file://./markdown-transformer.js', 53 | }; 54 | 55 | const anthropicModelConfig = { 56 | max_tokens: 8092, 57 | // tool_choice: 'auto', 58 | max_tool_calls: 10, 59 | ...anthropicTools, 60 | }; 61 | 62 | const openAIModel = { 63 | id: 'file://../../providers/OpenAiResponsesWithRecursiveToolCallsProvider.ts', 64 | transform: 'file://./markdown-transformer.js', 65 | }; 66 | 67 | const openAIModelConfig = { 68 | max_output_tokens: 16184, 69 | tool_choice: 'auto', 70 | max_tool_calls: 10, 71 | ...openAItools, 72 | }; 73 | 74 | /** @type {import('promptfoo').TestSuiteConfig} */ 75 | const config = { 76 | description: 'FHIR Bundle Generation (Multi Turn Tool Use)', 77 | 78 | providers: [ 79 | { 80 | ...openAIModel, 81 | config: { 82 | ...openAIModelConfig, 83 | model: 'gpt-3.5-turbo', 84 | }, 85 | label: 'openai-gpt-3.5-turbo', 86 | }, 87 | { 88 | ...openAIModel, 89 | config: { 90 | ...openAIModelConfig, 91 | model: 'gpt-4.1', 92 | }, 93 | label: 'openai-gpt-4.1', 94 | }, 95 | { 96 | ...openAIModel, 97 | config: { 98 | ...openAIModelConfig, 99 | model: 'o3', 100 | reasoning_effort: 'low', 101 | }, 102 | label: 'openai-o3-low', 103 | }, 104 | { 105 | ...openAIModel, 106 | config: { 107 | ...openAIModelConfig, 108 | model: 'o3', 109 | reasoning_effort: 'high', 110 | }, 111 | label: 'openai-o3-high', 112 | }, 113 | { 114 | ...anthropicModel, 115 | config: { 116 | ...anthropicModelConfig, 117 | model: 'claude-3-5-haiku-20241022', 118 | }, 119 | label: 'anthropic-claude-3-5-haiku-20241022', 120 | }, 121 | { 122 | ...anthropicModel, 123 | config: { 124 | ...anthropicModelConfig, 125 | model: 'claude-3-5-sonnet-20241022', 126 | }, 127 | label: 'anthropic-claude-3-5-sonnet-202410224', 128 | }, 129 | { 130 | ...anthropicModel, 131 | config: { 132 | ...anthropicModelConfig, 133 | model: 'claude-sonnet-4-20250514', 134 | }, 135 | label: 'anthropic-claude-sonnet-4-20250514', 136 | }, 137 | { 138 | ...anthropicModel, 139 | config: { 140 | ...anthropicModelConfig, 141 | model: 'claude-opus-4-20250514', 142 | }, 143 | label: 'anthropic-claude-opus-4-20250514', 144 | }, 145 | ], 146 | 147 | prompts: [ 148 | { 149 | label: 'Unstructured Note to FHIR', 150 | raw: `You are a health informaticist expert in FHIR. 151 | You will receive unstructured notes and you need to structure them into FHIR resources. 152 | You must only include data that is present in the note. 153 | You must only return a valid FHIR JSON Bundle, with the appropriate resources, with no additional explanation. 154 | You may include multiple resources in the bundle. 155 | You must follow the FHIR R4 specification. 156 | You mut not include a meta element in the resources. 157 | When generating a CodeableConcept, you must include a coding element with a system, code, and display. 158 | When generating a CodeableConcept, you must use a display matching what is expected by the CodeSystem. 159 | Each entry in a Bundle must have a fullUrl which is the identity of the resource in the entry. 160 | The id of a resource must be a valid UUID in lowercase. 161 | 162 | You have access to a validator tool that will validate the FHIR bundle. 163 | You should use this tool recursively to fix errors, using it again after you have called it to ensure that FHIR resources are fully valid after making changes. 164 | 165 | Include the FHIR JSON bundle in your final response. 166 | 167 | {{note}} 168 | `, 169 | }, 170 | ], 171 | 172 | defaultTest: { 173 | assert: [ 174 | { type: 'is-json' }, 175 | { type: 'javascript', value: 'file://../../assertions/isBundle.mjs' }, 176 | { type: 'javascript', value: 'file://../../assertions/metaElementMissing.mjs' }, 177 | { type: 'javascript', value: 'file://../../assertions/validateOperation.mjs' }, 178 | ], 179 | }, 180 | 181 | tests: ['file://tests.yaml'], 182 | }; 183 | 184 | export default config; 185 | -------------------------------------------------------------------------------- /evals/generation/config-zero-shot-bundle.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json 2 | description: 'FHIR Bundle Generation (Zero Shot)' 3 | 4 | providers: 5 | - file://providers.yaml 6 | 7 | prompts: 8 | - label: 'Unstructured Note to FHIR' 9 | raw: | 10 | You are a health informaticist expert in FHIR. 11 | You will receive unstructured notes and you need to structure them into FHIR resources. 12 | You must only include data that is present in the note. 13 | You must only return a valid FHIR JSON Bundle, with the appropriate resources, with no additional explanation. 14 | You may include multiple resources in the bundle. 15 | You must follow the FHIR R4 specification. 16 | You mut not include a meta element in the resources. 17 | When generateing a CodeableConcept, you must include a coding element with a system, code, and display. 18 | When generating a CodeableConcept, you must use a display matching what is expected by the CodeSystem. 19 | Each entry in a Bundle must have a fullUrl which is the identity of the resource in the entry. 20 | The id of a resource must be a valid UUID in lowercase. 21 | 22 | You must only return JSON with no additional markup or explanation. 23 | 24 | 25 | {{note}} 26 | 27 | 28 | defaultTest: 29 | assert: 30 | - type: is-json 31 | - type: javascript 32 | value: file://../../assertions/isBundle.mjs 33 | - type: javascript 34 | value: file://../../assertions/metaElementMissing.mjs 35 | - type: javascript 36 | value: file://../../assertions/validateOperation.mjs 37 | 38 | tests: 39 | - file://tests.yaml 40 | -------------------------------------------------------------------------------- /evals/generation/markdown-transformer.js: -------------------------------------------------------------------------------- 1 | module.exports = (output, _context) => { 2 | // If the output contains a fenced JSON code block, extract and return the JSON inside the fence. 3 | if (typeof output === 'string') { 4 | // Look for ```json ... ``` first 5 | const jsonFenceMatch = output.match(/```json\s*([\s\S]*?)```/i); 6 | if (jsonFenceMatch && jsonFenceMatch[1]) { 7 | return jsonFenceMatch[1].trim(); 8 | } 9 | 10 | const alternateFenchMatch = output.match(/```\s*([\s\S]*?)```/i); 11 | if (alternateFenchMatch && alternateFenchMatch[1]) { 12 | return alternateFenchMatch[1].trim(); 13 | } 14 | } 15 | 16 | // Default: return the original output untouched. 17 | return output; 18 | }; 19 | -------------------------------------------------------------------------------- /evals/generation/providers.yaml: -------------------------------------------------------------------------------- 1 | - label: openai-gpt-3.5-turbo 2 | id: openai:chat:gpt-3.5-turbo 3 | config: 4 | max_output_tokens: 16184 5 | - label: openai-gpt-4.1 6 | id: openai:responses:gpt-4.1 7 | config: 8 | max_output_tokens: 16184 9 | - label: openai-o3-low 10 | id: openai:responses:o3 11 | config: 12 | max_output_tokens: 16184 13 | reasoning_effort: 'low' 14 | - label: openai-o3-high 15 | id: openai:responses:o3 16 | config: 17 | max_output_tokens: 16184 18 | reasoning_effort: 'high' 19 | - label: claude-3-5-haiku-20241022 20 | id: anthropic:messages:claude-3-5-haiku-20241022 21 | config: 22 | max_output_tokens: 8192 23 | - label: anthropic-claude-3-5-sonnet-20241022 24 | id: anthropic:messages:claude-3-5-sonnet-20241022 25 | config: 26 | max_tokens: 8192 27 | - label: anthropic-claude-sonnet-4-20250514 28 | id: anthropic:messages:claude-sonnet-4-20250514 29 | config: 30 | max_tokens: 8192 31 | transform: | 32 | output = output.replace(/^```json\n/, '').replace(/\n```$/, '').trim(); 33 | return output; 34 | - label: anthropic-claude-opus-4-20250514 35 | id: anthropic:messages:claude-opus-4-20250514 36 | config: 37 | max_tokens: 8192 38 | transform: | 39 | output = output.replace(/^```json\n/, '').replace(/\n```$/, '').trim(); 40 | return output; 41 | - label: google-gemini-2.0-flash 42 | id: google:gemini-2.0-flash 43 | config: 44 | max_output_tokens: 16184 45 | generationConfig: 46 | response_mime_type: 'application/json' 47 | - label: google-gemini-2.5-flash-preview-05-20 48 | id: google:gemini-2.5-flash-preview-05-20 49 | config: 50 | max_output_tokens: 16184 51 | generationConfig: 52 | response_mime_type: 'application/json' 53 | - label: google-gemini-2.5-pro-preview-05-06 54 | id: google:gemini-2.5-pro-preview-05-06 55 | config: 56 | max_output_tokens: 16184 57 | generationConfig: 58 | response_mime_type: 'application/json' 59 | - label: ii-medical-8b 60 | id: openai:chat:II-Medical-8B 61 | config: 62 | max_output_tokens: 16184 63 | apiBaseUrl: https://g6ifi04b81u9oza5.us-east-1.aws.endpoints.huggingface.cloud/v1 64 | showThinking: false 65 | transform: | 66 | output = output.replace(/.*<\/think>/gis, '').trim(); 67 | output = output.replace(/<\s*\/?\s*Answer\s*>/gi, '').trim(); 68 | return output; 69 | - label: medgemma-4b-it 70 | id: openai:chat:medgemma-4b-it 71 | config: 72 | max_output_tokens: 16184 73 | apiBaseUrl: https://a6pf0b0uqcuajaua.us-east-1.aws.endpoints.huggingface.cloud/v1 74 | showThinking: false 75 | transform: | 76 | output = output.replace(/<\s*\/?\s*Answer\s*>/gi, '').trim(); 77 | output = output.replace(/```json\s*([\s\S]*?)```/i, '$1').trim(); 78 | return output; 79 | - label: medgemma-27b-text-it 80 | id: openai:chat:medgemma-27b-text-it 81 | config: 82 | max_output_tokens: 16184 83 | apiBaseUrl: https://i7n97jz1el3l39h5.us-east-1.aws.endpoints.huggingface.cloud/v1 84 | showThinking: false 85 | transform: | 86 | output = output.replace(/<\s*\/?\s*Answer\s*>/gi, '').trim(); 87 | output = output.replace(/```json\s*([\s\S]*?)```/i, '$1').trim(); 88 | return output; 89 | -------------------------------------------------------------------------------- /evals/generation/tests.yaml: -------------------------------------------------------------------------------- 1 | - description: Basic patient resource 2 | vars: 3 | note: | 4 | Patient Marie Curie (DOB: 1867-11-07) 5 | assert: 6 | - type: javascript 7 | value: | 8 | const result = JSON.parse(output); 9 | return ( 10 | result.entry.some(e => e.resource.resourceType === 'Patient') && 11 | result.entry.some(e => e.resource.name.some(n => n.given && n.given.includes('Marie') && n.family && n.family.includes('Curie'))) && 12 | result.entry.some(e => e.resource.birthDate === '1867-11-07') 13 | ); 14 | 15 | - vars: 16 | note: | 17 | Patient: Emily Chen, born 2010-11-05, received influenza vaccine on 2023-10-15. 18 | assert: 19 | - type: javascript 20 | value: | 21 | const result = JSON.parse(output); 22 | return ( 23 | result.entry.some(e => 24 | e.resource.resourceType === 'Immunization' && 25 | e.resource.vaccineCode?.coding?.some(c => c.display?.toLowerCase().includes('influenza')) 26 | ) 27 | ); 28 | 29 | - vars: 30 | note: | 31 | Patient John Smith (DOB: 1990-01-15) presented with fever (39.2°C) and cough on 2024-03-15. 32 | BP was 120/80. History of asthma. Prescribed azithromycin 500mg daily for 3 days. 33 | assert: 34 | - type: javascript 35 | value: | 36 | const result = JSON.parse(output); 37 | return ( 38 | result.entry.some(e => e.resource.resourceType === 'Patient') && 39 | result.entry.some(e => e.resource.resourceType === 'Observation') && 40 | result.entry.some(e => e.resource.resourceType === 'MedicationRequest') 41 | ); 42 | 43 | - description: Blood chemistry diagnostic report 44 | vars: 45 | note: | 46 | Patient: Carlos Ramirez (DOB: 1972-02-09) had routine blood work on 2024-04-20 showing elevated LDL cholesterol (160 mg/dL). 47 | assert: 48 | - type: javascript 49 | value: | 50 | const result = JSON.parse(output); 51 | return ( 52 | result.entry.some(e => e.resource.resourceType === 'Patient') && 53 | result.entry.some(e => e.resource.resourceType === 'DiagnosticReport') && 54 | result.entry.some(e => e.resource.resourceType === 'Observation' && e.resource.code?.coding?.[0]?.display?.toLowerCase().includes('ldl')) 55 | ); 56 | 57 | - description: Documented peanut allergy 58 | vars: 59 | note: | 60 | Patient Sarah Johnson (DOB: 1985-07-22) has a severe peanut allergy resulting in anaphylaxis. 61 | assert: 62 | - type: javascript 63 | value: | 64 | const result = JSON.parse(output); 65 | return ( 66 | result.entry.some(e => e.resource.resourceType === 'Patient') && 67 | result.entry.some(e => e.resource.resourceType === 'AllergyIntolerance' && e.resource.code?.coding?.[0]?.display?.toLowerCase().includes('peanut')) 68 | ); 69 | 70 | - description: Outpatient encounter with blood pressure observation 71 | vars: 72 | note: | 73 | Patient Michael Lee (DOB: 1998-12-01) had an outpatient visit on 2024-05-02 with blood pressure 118/76 mmHg. 74 | assert: 75 | - type: javascript 76 | value: | 77 | const result = JSON.parse(output); 78 | return ( 79 | result.entry.some(e => e.resource.resourceType === 'Encounter') && 80 | result.entry.some(e => e.resource.resourceType === 'Observation' && (e.resource.code?.text?.toLowerCase().includes('blood pressure') || e.resource.code?.coding?.some(c => c.display?.toLowerCase().includes('blood pressure')))) 81 | ); 82 | 83 | - description: Chronic condition diabetes mellitus type 2 84 | vars: 85 | note: | 86 | Patient Olivia Nguyen (DOB: 1960-06-14) diagnosed with Type 2 diabetes mellitus on 2015-08-10. 87 | assert: 88 | - type: javascript 89 | value: | 90 | const result = JSON.parse(output); 91 | return ( 92 | result.entry.some(e => e.resource.resourceType === 'Condition' && (e.resource.code?.text?.toLowerCase().includes('diabetes') || e.resource.code?.coding?.some(c => c.display?.toLowerCase().includes('diabetes')))) 93 | ); 94 | 95 | - description: Appendectomy procedure 96 | vars: 97 | note: | 98 | Patient Liam Patel (DOB: 2002-03-30) underwent an emergency appendectomy on 2024-01-12. 99 | assert: 100 | - type: javascript 101 | value: | 102 | const result = JSON.parse(output); 103 | return ( 104 | result.entry.some(e => e.resource.resourceType === 'Procedure' && (e.resource.code?.text?.toLowerCase().includes('appendectomy') || e.resource.code?.coding?.some(c => c.display?.toLowerCase().includes('appendectomy')))) 105 | ); 106 | 107 | - description: Medication statement – metformin therapy 108 | vars: 109 | note: | 110 | Patient Noah Kim (DOB: 1975-11-19) takes metformin 500mg twice daily for Type 2 diabetes. 111 | assert: 112 | - type: javascript 113 | value: | 114 | const result = JSON.parse(output); 115 | return ( 116 | result.entry.some(e => e.resource.resourceType === 'MedicationStatement' && (e.resource.medicationCodeableConcept?.text?.toLowerCase().includes('metformin') || e.resource.medicationCodeableConcept?.coding?.some(c => c.display?.toLowerCase().includes('metformin')))) 117 | ); 118 | 119 | - description: Healthcare organization details 120 | vars: 121 | note: | 122 | Redwood Medical Center located at 456 Elm St, Springfield, phone 555-6789. 123 | assert: 124 | - type: javascript 125 | value: | 126 | const result = JSON.parse(output); 127 | return ( 128 | result.entry.some(e => e.resource.resourceType === 'Organization' && e.resource.name?.includes('Redwood Medical Center')) 129 | ); 130 | 131 | - description: Practitioner profile cardiologist 132 | vars: 133 | note: | 134 | Cardiologist Dr. Jane Taylor, National Provider Identifier (NPI): 1234567890. 135 | assert: 136 | - type: javascript 137 | value: | 138 | const result = JSON.parse(output); 139 | return ( 140 | result.entry.some(e => e.resource.resourceType === 'Practitioner' && e.resource.name?.some(n => n.family?.includes('Taylor'))) && 141 | result.entry.some(e => e.resource.resourceType === 'Practitioner' && e.resource.identifier?.some(id => id.value === '1234567890')) 142 | ); 143 | 144 | - description: Insurance plan coverage period 145 | vars: 146 | note: | 147 | UnitedHealthcare Gold Plan (policy ID 987654) valid from 2024-01-01 through 2024-12-31 for Daniel Williams (DOB: 1980-05-15). 148 | assert: 149 | - type: javascript 150 | value: | 151 | const result = JSON.parse(output); 152 | return ( 153 | result.entry.some(e => e.resource.resourceType === 'Coverage') && 154 | result.entry.some(e => e.resource.resourceType === 'Patient') 155 | ); 156 | 157 | - description: Outpatient claim for office visit 158 | vars: 159 | note: | 160 | Claim of $123.45 for CPT code 99213 submitted on 2024-04-10. 161 | assert: 162 | - type: javascript 163 | value: | 164 | const result = JSON.parse(output); 165 | return ( 166 | result.entry.some(e => e.resource.resourceType === 'Claim' && e.resource.total?.value === 123.45) && 167 | result.entry.some(e => e.resource.resourceType === 'Claim' && e.resource.item?.some(i => i.productOrService?.coding?.some(c => c.code === '99213'))) 168 | ); 169 | 170 | - description: Complex patient case 171 | vars: 172 | note: | 173 | Patient John Smith (DOB: 1945-08-20) has hypertension and type 2 diabetes. Blood pressure reading on 2024-03-15 was 140/90 mmHg. A1C measured 7.2% on 2024-03-15. 174 | assert: 175 | - type: javascript 176 | value: | 177 | const result = JSON.parse(output); 178 | return ( 179 | result.entry.some(e => e.resource.resourceType === 'Patient' && e.resource.name?.some(n => n.family === 'Smith')) && 180 | result.entry.some(e => e.resource.resourceType === 'Condition' && e.resource.code?.coding?.some(c => c.code === '44054006')) && // Type 2 diabetes 181 | result.entry.some(e => e.resource.resourceType === 'Condition' && e.resource.code?.coding?.some(c => c.code === '38341003')) && // Hypertension 182 | result.entry.some(e => e.resource.resourceType === 'Observation' && e.resource.code?.coding?.some(c => c.code === '85354-9')) && // Blood pressure 183 | result.entry.some(e => e.resource.resourceType === 'Observation' && e.resource.code?.coding?.some(c => c.code === '4548-4')) // Hemoglobin A1c 184 | ); 185 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@flexpa/llm-fhir-eval", 3 | "module": "index.ts", 4 | "version": "0.0.4", 5 | "scripts": { 6 | "eval": "promptfoo eval" 7 | }, 8 | "devDependencies": { 9 | "promptfoo": "0.113.3" 10 | }, 11 | "resolutions": { 12 | "promptfoo@0.113.3": "patch:promptfoo@npm%3A0.113.3#./.yarn/patches/promptfoo-npm-0.113.3-239bf96f0e.patch" 13 | }, 14 | "peerDependencies": { 15 | "typescript": "^5.0.0" 16 | }, 17 | "flexpa": { 18 | "publishableRepo": true 19 | }, 20 | "packageManager": "yarn@4.9.1" 21 | } 22 | -------------------------------------------------------------------------------- /providers/AnthropicMessagesWithRecursiveToolCallsProvider.ts: -------------------------------------------------------------------------------- 1 | import { type EnvOverrides, type ProviderResponse } from 'promptfoo'; 2 | import type Anthropic from '@anthropic-ai/sdk'; 3 | import { APIError } from '@anthropic-ai/sdk'; 4 | import { getEnvInt, getEnvFloat } from 'promptfoo/dist/src/envars'; 5 | import logger from 'promptfoo/dist/src/logger'; 6 | import { maybeLoadToolsFromExternalFile } from 'promptfoo/dist/src/util'; 7 | import { AnthropicGenericProvider } from 'promptfoo/dist/src/providers/anthropic/generic'; 8 | import type { AnthropicMessageOptions } from 'promptfoo/dist/src/providers/anthropic/types'; 9 | import { 10 | outputFromMessage, 11 | parseMessages, 12 | calculateAnthropicCost, 13 | getTokenUsage, 14 | ANTHROPIC_MODELS, 15 | } from 'promptfoo/dist/src/providers/anthropic/util'; 16 | 17 | /* eslint-disable @typescript-eslint/no-explicit-any */ 18 | export default class AnthropicMessagesProvider extends AnthropicGenericProvider { 19 | declare config: AnthropicMessageOptions; 20 | private initializationPromise: Promise | null = null; 21 | 22 | static ANTHROPIC_MODELS = ANTHROPIC_MODELS; 23 | 24 | static ANTHROPIC_MODELS_NAMES = ANTHROPIC_MODELS.map((model) => model.id); 25 | 26 | constructor(options: { id?: string; config?: AnthropicMessageOptions; env?: EnvOverrides } = {}) { 27 | if (!AnthropicMessagesProvider.ANTHROPIC_MODELS_NAMES.includes(options.config!.model!)) { 28 | logger.warn(`Using unknown Anthropic model: ${options.config?.model}`); 29 | } 30 | super(options.config?.model || 'claude-3-5-sonnet-20241022', options); 31 | const { id } = options; 32 | this.id = id ? () => id : this.id; 33 | } 34 | 35 | async cleanup(): Promise {} 36 | 37 | toString(): string { 38 | if (!this.modelName) { 39 | throw new Error('Anthropic model name is not set. Please provide a valid model name.'); 40 | } 41 | return `[Anthropic Messages Provider ${this.modelName}]`; 42 | } 43 | 44 | private extractToolUses(message: any): { 45 | toolUses: Array<{ id: string; name: string; arguments: any }>; 46 | resultText: string; 47 | } { 48 | const toolUses: Array<{ id: string; name: string; arguments: any }> = []; 49 | let resultText = ''; 50 | 51 | // The assistant message content is an array of blocks. Iterate over them to find tool calls. 52 | if (Array.isArray((message as any).content)) { 53 | for (const block of (message as any).content) { 54 | if (block.type === 'tool_use') { 55 | toolUses.push({ id: block.id, name: block.name, arguments: block.input }); 56 | } else if (block.type === 'text') { 57 | resultText += block.text; 58 | } 59 | } 60 | } else if (typeof (message as any).content === 'string') { 61 | // Fallback: if the content is a simple string, treat it as text. 62 | resultText += (message as any).content; 63 | } 64 | 65 | return { toolUses, resultText }; 66 | } 67 | 68 | private async executeToolCall( 69 | toolUse: { id: string; name: string; arguments: any }, 70 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 71 | callbacks: Record any>, 72 | ): Promise<{ role: 'user'; content: Array<{ type: 'tool_result'; tool_use_id: string; content: string }> }> { 73 | const { name, id, arguments: args } = toolUse; 74 | 75 | if (!callbacks[name]) { 76 | logger.warn(`No callback configured for tool '${name}'. Returning placeholder result.`); 77 | return { 78 | role: 'user', 79 | content: [ 80 | { 81 | type: 'tool_result', 82 | tool_use_id: id, 83 | content: `No callback configured for tool '${name}'.`, 84 | }, 85 | ], 86 | } as any; 87 | } 88 | 89 | try { 90 | const toolResult = await callbacks[name](typeof args === 'string' ? args : JSON.stringify(args)); 91 | return { 92 | role: 'user', 93 | content: [ 94 | { 95 | type: 'tool_result', 96 | tool_use_id: id, 97 | content: typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult), 98 | }, 99 | ], 100 | } as any; 101 | } catch (error) { 102 | logger.error(`Error executing tool '${name}': ${error}`); 103 | return { 104 | role: 'user', 105 | content: [ 106 | { 107 | type: 'tool_result', 108 | tool_use_id: id, 109 | content: `error: ${error instanceof Error ? error.message : String(error)}`, 110 | }, 111 | ], 112 | } as any; 113 | } 114 | } 115 | 116 | async callApi(prompt: string): Promise { 117 | // Wait for MCP initialization if it's in progress 118 | if (this.initializationPromise) { 119 | await this.initializationPromise; 120 | } 121 | 122 | if (!this.apiKey) { 123 | throw new Error( 124 | 'Anthropic API key is not set. Set the ANTHROPIC_API_KEY environment variable or add `apiKey` to the provider config.', 125 | ); 126 | } 127 | 128 | if (!this.modelName) { 129 | throw new Error('Anthropic model name is not set. Please provide a valid model name.'); 130 | } 131 | 132 | const { system, extractedMessages, thinking } = parseMessages(prompt); 133 | 134 | // Get MCP tools if client is initialized 135 | const fileTools = maybeLoadToolsFromExternalFile(this.config.tools) || []; 136 | const allTools = [...fileTools]; 137 | 138 | const maxDepth = (this.config as any)?.maxDepth ?? 10; 139 | const toolCallbacks: Record any> | undefined = (this.config as any) 140 | ?.functionToolCallbacks; 141 | 142 | let depth = 0; 143 | let messages: any[] = extractedMessages as any[]; 144 | 145 | let accumTokenUsage = { prompt: 0, completion: 0, total: 0 } as { 146 | prompt: number; 147 | completion: number; 148 | total: number; 149 | }; 150 | let accumCost = 0; 151 | 152 | const headers: Record = { 153 | ...(this.config.headers || {}), 154 | }; 155 | 156 | // Add beta features header if specified 157 | if (this.config.beta?.length) { 158 | headers['anthropic-beta'] = this.config.beta.join(','); 159 | } 160 | 161 | while (depth <= maxDepth) { 162 | const params: Anthropic.MessageCreateParams = { 163 | model: this.modelName, 164 | ...(system ? { system } : {}), 165 | max_tokens: 166 | this.config?.max_tokens || getEnvInt('ANTHROPIC_MAX_TOKENS', this.config.thinking || thinking ? 2048 : 1024), 167 | messages, 168 | stream: false, 169 | temperature: 170 | this.config.thinking || thinking 171 | ? this.config.temperature 172 | : this.config.temperature || getEnvFloat('ANTHROPIC_TEMPERATURE', 0), 173 | ...(allTools.length > 0 ? { tools: allTools } : {}), 174 | ...(this.config.tool_choice ? { tool_choice: this.config.tool_choice } : {}), 175 | ...(this.config.thinking || thinking ? { thinking: this.config.thinking || thinking } : {}), 176 | ...(typeof this.config?.extra_body === 'object' && this.config.extra_body ? this.config.extra_body : {}), 177 | }; 178 | 179 | logger.debug(`Calling Anthropic Messages API (depth ${depth}): ${JSON.stringify(params)}`); 180 | 181 | let response: any; 182 | try { 183 | response = await this.anthropic.messages.create(params, { 184 | ...(Object.keys(headers).length > 0 ? { headers } : {}), 185 | }); 186 | } catch (err) { 187 | logger.error(`Anthropic Messages API call error: ${err instanceof Error ? err.message : String(err)}`); 188 | if (err instanceof APIError && err.error) { 189 | const errorDetails = err.error as { error: { message: string; type: string } }; 190 | return { 191 | error: `API call error: ${errorDetails.error.message}, status ${err.status}, type ${errorDetails.error.type}`, 192 | }; 193 | } 194 | return { 195 | error: `API call error: ${err instanceof Error ? err.message : String(err)}`, 196 | }; 197 | } 198 | 199 | // Aggregate token usage and cost 200 | const tokenUsage = getTokenUsage(response, false); 201 | accumTokenUsage = { 202 | prompt: (accumTokenUsage.prompt || 0) + (tokenUsage.prompt || 0), 203 | completion: (accumTokenUsage.completion || 0) + (tokenUsage.completion || 0), 204 | total: (accumTokenUsage.total || 0) + (tokenUsage.total || 0), 205 | }; 206 | 207 | const callCost = calculateAnthropicCost( 208 | this.modelName, 209 | this.config, 210 | (response as any).usage?.input_tokens, 211 | (response as any).usage?.output_tokens, 212 | ); 213 | accumCost += callCost || 0; 214 | 215 | // Push assistant response to history for next iteration (if any) 216 | messages.push({ role: 'assistant', content: (response as any).content } as any); 217 | 218 | // Extract tool uses from this response 219 | const { toolUses } = this.extractToolUses(response); 220 | 221 | if (toolUses.length === 0 || depth >= maxDepth) { 222 | return { 223 | output: outputFromMessage(response, this.config.showThinking ?? true), 224 | tokenUsage: accumTokenUsage, 225 | cost: accumCost, 226 | metadata: { 227 | depth, 228 | }, 229 | }; 230 | } 231 | 232 | // Execute tools and append their results to history 233 | const toolResultMessages: any[] = []; 234 | for (const toolUse of toolUses) { 235 | const toolMessage = await this.executeToolCall(toolUse, toolCallbacks || {}); 236 | toolResultMessages.push(toolMessage); 237 | } 238 | 239 | messages = [...messages, ...toolResultMessages]; 240 | 241 | depth += 1; 242 | } 243 | 244 | // If loop exits without returning, it means we exceeded maxDepth with pending tool calls 245 | return { 246 | output: 'Reached maximum recursion depth without completing all tool calls.', 247 | tokenUsage: accumTokenUsage, 248 | cost: accumCost, 249 | metadata: { 250 | depth, 251 | reachedMaxDepth: true, 252 | }, 253 | }; 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /providers/OpenAiResponsesWithRecursiveToolCallsProvider.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import promptfoo from 'promptfoo'; 3 | import { OpenAiResponsesProvider } from 'promptfoo/dist/src/providers/openai/responses'; 4 | import { getTokenUsage, calculateOpenAICost, formatOpenAiError } from 'promptfoo/dist/src/providers/openai/util'; 5 | import logger from 'promptfoo/dist/src/logger'; 6 | import type { CallApiContextParams, CallApiOptionsParams, ProviderResponse, EnvOverrides } from 'promptfoo'; 7 | import type { OpenAiCompletionOptions } from 'promptfoo/dist/src/providers/openai/types'; 8 | 9 | const fetchWithCache = promptfoo.cache.fetchWithCache; 10 | 11 | const REQUEST_TIMEOUT_MS = 1200000; // 30 seconds 12 | 13 | type ApiResponse = { 14 | data: any; 15 | cached: boolean; 16 | status: number; 17 | statusText: string; 18 | }; 19 | 20 | type FunctionCall = { 21 | call_id: string; 22 | name: string; 23 | input?: any; 24 | arguments?: any; 25 | }; 26 | 27 | type OpenAiResponsesWithRecursiveToolCallsProviderOptions = OpenAiCompletionOptions & { 28 | model: string; 29 | }; 30 | 31 | /** 32 | * Extended version of OpenAI responses provider that recursively handles function calls 33 | * until no more functions need to be called or max depth is reached. 34 | */ 35 | export default class OpenAiResponsesWithRecursiveToolCallsProvider extends OpenAiResponsesProvider { 36 | constructor( 37 | options: { config?: OpenAiResponsesWithRecursiveToolCallsProviderOptions; id?: string; env?: EnvOverrides } = {}, 38 | ) { 39 | super(options.config?.model || 'o3', options); 40 | } 41 | 42 | private async makeApiCall(body: any, config: any): Promise { 43 | return await fetchWithCache( 44 | `${this.getApiUrl()}/responses`, 45 | { 46 | method: 'POST', 47 | headers: { 48 | 'Content-Type': 'application/json', 49 | Authorization: `Bearer ${this.getApiKey()}`, 50 | ...(this.getOrganization() ? { 'OpenAI-Organization': this.getOrganization() } : {}), 51 | ...config.headers, 52 | }, 53 | body: JSON.stringify(body), 54 | }, 55 | REQUEST_TIMEOUT_MS, 56 | ); 57 | } 58 | 59 | private extractFunctionCalls(output: any[]): { 60 | functionCalls: FunctionCall[]; 61 | result: string; 62 | refusal: string | null; 63 | } { 64 | let result = ''; 65 | let refusal = null; 66 | const functionCalls: FunctionCall[] = []; 67 | 68 | for (const item of output) { 69 | if (item.type === 'function_call') { 70 | functionCalls.push(item); 71 | result = JSON.stringify(item); 72 | } else if (item.type === 'message' && item.role === 'assistant') { 73 | if (item.content) { 74 | for (const contentItem of item.content) { 75 | if (contentItem.type === 'output_text') { 76 | result += contentItem.text; 77 | } else if (contentItem.type === 'tool_use' || contentItem.type === 'function_call') { 78 | functionCalls.push(contentItem); 79 | result = JSON.stringify(contentItem); 80 | } else if (contentItem.type === 'refusal') { 81 | refusal = contentItem.refusal; 82 | } 83 | } 84 | } else if (item.refusal) { 85 | refusal = item.refusal; 86 | } 87 | } else if (item.type === 'tool_result') { 88 | result = JSON.stringify(item); 89 | } 90 | } 91 | 92 | return { functionCalls, result, refusal }; 93 | } 94 | 95 | private async executeFunctionCall( 96 | functionCall: FunctionCall, 97 | // eslint-disable-next-line @typescript-eslint/no-unsafe-function-type 98 | callbacks: Record, 99 | ): Promise<{ type: string; call_id: string; output: any }> { 100 | const functionName = functionCall.name; 101 | const functionArgs = functionCall.input || functionCall.arguments; 102 | 103 | try { 104 | const functionResult = await callbacks[functionName]( 105 | typeof functionArgs === 'string' ? functionArgs : JSON.stringify(functionArgs), 106 | ); 107 | 108 | return { 109 | type: 'function_call_output', 110 | call_id: functionCall.call_id, 111 | output: functionResult, 112 | }; 113 | } catch (error) { 114 | logger.error(`Error executing function ${functionName}: ${error}`); 115 | return { 116 | type: 'function_call_output', 117 | call_id: functionCall.call_id, 118 | output: `error: ${error}`, 119 | }; 120 | } 121 | } 122 | 123 | private async processResponse( 124 | initialBody: any, 125 | response: ApiResponse, 126 | config: any, 127 | depth: number = 0, 128 | maxDepth: number = 10, 129 | accumTokenUsage: { prompt: number; completion: number; total: number } = { prompt: 0, completion: 0, total: 0 }, 130 | accumCost: number = 0, 131 | ): Promise { 132 | const { data, cached } = response; 133 | 134 | if (data.error) { 135 | await data.deleteFromCache?.(); 136 | return { error: formatOpenAiError(data) }; 137 | } 138 | 139 | const { functionCalls, result, refusal } = this.extractFunctionCalls(data.output || []); 140 | 141 | // Handle refusal 142 | const currentTokenUsage = getTokenUsage(data, cached); 143 | const currentCost = calculateOpenAICost( 144 | this.modelName, 145 | config, 146 | data.usage?.input_tokens, 147 | data.usage?.output_tokens, 148 | 0, 149 | 0, 150 | ); 151 | const totalTokenUsage = { 152 | prompt: (accumTokenUsage.prompt || 0) + (currentTokenUsage.prompt || 0), 153 | completion: (accumTokenUsage.completion || 0) + (currentTokenUsage.completion || 0), 154 | total: (accumTokenUsage.total || 0) + (currentTokenUsage.total || 0), 155 | }; 156 | const totalCost = (accumCost || 0) + (currentCost || 0); 157 | 158 | if (refusal) { 159 | return { 160 | output: refusal, 161 | tokenUsage: totalTokenUsage, 162 | isRefusal: true, 163 | cached, 164 | cost: totalCost, 165 | raw: data, 166 | metadata: { 167 | depth, 168 | }, 169 | }; 170 | } 171 | 172 | // If no function calls or reached max depth, return current result 173 | if (functionCalls.length === 0 || depth >= maxDepth) { 174 | return { 175 | output: result, 176 | tokenUsage: totalTokenUsage, 177 | cached, 178 | cost: totalCost, 179 | raw: data, 180 | metadata: { 181 | depth, 182 | }, 183 | }; 184 | } 185 | 186 | // Execute functions and update conversation history 187 | if (config.functionToolCallbacks) { 188 | const newHistory = []; 189 | 190 | // Execute functions and add results to history 191 | for (const functionCall of functionCalls) { 192 | if (config.functionToolCallbacks[functionCall.name]) { 193 | const result = await this.executeFunctionCall(functionCall, config.functionToolCallbacks); 194 | newHistory.push(result); 195 | } 196 | } 197 | 198 | // Make next recursive call 199 | const nextBody = { 200 | ...initialBody, 201 | input: newHistory, 202 | previous_response_id: response.data.id, 203 | }; 204 | 205 | try { 206 | const nextResponse = await this.makeApiCall(nextBody, config); 207 | 208 | if (nextResponse.status < 200 || nextResponse.status >= 300) { 209 | logger.info(`API ERROR: ${JSON.stringify(nextResponse.data)}`); 210 | return { 211 | error: `API error: ${nextResponse.status} ${nextResponse.statusText}`, 212 | output: result, 213 | tokenUsage: totalTokenUsage, 214 | cached: false, 215 | cost: totalCost, 216 | raw: data, 217 | metadata: { 218 | depth, 219 | }, 220 | }; 221 | } 222 | 223 | // Process next response recursively 224 | const nextResult = await this.processResponse( 225 | nextBody, 226 | nextResponse, 227 | config, 228 | depth + 1, 229 | maxDepth, 230 | totalTokenUsage, 231 | totalCost, 232 | ); 233 | 234 | return { 235 | ...nextResult, 236 | cached: false, 237 | }; 238 | } catch (err) { 239 | logger.error(`API call error in recursive call: ${err}`); 240 | return { 241 | error: `API call error in recursive call: ${err}`, 242 | output: result, 243 | tokenUsage: totalTokenUsage, 244 | cached: false, 245 | cost: totalCost, 246 | raw: data, 247 | metadata: { 248 | depth, 249 | }, 250 | }; 251 | } 252 | } 253 | 254 | // If no callbacks configured, return current result 255 | return { 256 | output: result, 257 | tokenUsage: totalTokenUsage, 258 | cached, 259 | cost: totalCost, 260 | raw: data, 261 | metadata: { 262 | depth, 263 | }, 264 | }; 265 | } 266 | 267 | async callApi( 268 | prompt: string, 269 | context?: CallApiContextParams, 270 | callApiOptions?: CallApiOptionsParams, 271 | ): Promise { 272 | if (!this.getApiKey()) { 273 | throw new Error( 274 | 'OpenAI API key is not set. Set the OPENAI_API_KEY environment variable or add `apiKey` to the provider config.', 275 | ); 276 | } 277 | 278 | const { body: initialBody, config } = this.getOpenAiBody(prompt, context, callApiOptions); 279 | logger.debug(`Calling OpenAI Responses API: ${JSON.stringify(initialBody)}`); 280 | 281 | try { 282 | const initialResponse = await this.makeApiCall(initialBody, config); 283 | 284 | if (initialResponse.status < 200 || initialResponse.status >= 300) { 285 | return { 286 | error: `API error: ${initialResponse.status} ${initialResponse.statusText}\n${ 287 | typeof initialResponse.data === 'string' ? initialResponse.data : JSON.stringify(initialResponse.data) 288 | }`, 289 | }; 290 | } 291 | 292 | // Start recursive processing 293 | return await this.processResponse(initialBody, initialResponse, config); 294 | } catch (err) { 295 | logger.error(`Initial API call error: ${err}`); 296 | return { 297 | error: `Initial API call error: ${err}`, 298 | }; 299 | } 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /tools/validateFhirBundle.mjs: -------------------------------------------------------------------------------- 1 | import { validate } from '../assertions/validateOperation.mjs'; 2 | 3 | export default async function validateFhirBundle(bundle) { 4 | const response = await validate(JSON.stringify(JSON.parse(bundle).bundle)); 5 | if (response.length > 0) { 6 | return JSON.stringify(response); 7 | } 8 | return `No errors found. Here is the bundle: ${JSON.stringify(JSON.parse(bundle).bundle)}`; 9 | } 10 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | // Enable latest features 4 | "lib": ["ESNext", "DOM"], 5 | "target": "ESNext", 6 | "module": "ESNext", 7 | "moduleDetection": "force", 8 | "jsx": "react-jsx", 9 | "allowJs": true, 10 | 11 | // Bundler mode 12 | "moduleResolution": "node", 13 | "allowImportingTsExtensions": true, 14 | "verbatimModuleSyntax": true, 15 | "noEmit": true, 16 | 17 | // Best practices 18 | "strict": true, 19 | "skipLibCheck": true, 20 | "noFallthroughCasesInSwitch": true, 21 | 22 | // Some stricter flags (disabled by default) 23 | "noUnusedLocals": false, 24 | "noUnusedParameters": false, 25 | "noPropertyAccessFromIndexSignature": false 26 | } 27 | } 28 | --------------------------------------------------------------------------------