├── .gitignore ├── README.md ├── data ├── datasets │ └── .gitignore └── prompt_templates │ ├── binary_entailment │ └── entailment_binary6.tmpl │ ├── fact_decomposition │ ├── prompt1.tmpl │ ├── prompt1_icl.tmpl │ ├── prompt2.tmpl │ └── prompt2_icl.tmpl │ └── fact_decomposition_delimiter │ └── prompt1_icl.tmpl ├── docs ├── dataset_summary.md ├── experiments.md ├── nlp_benchmarks.md ├── release_files.md └── runtimes_and_costs.md ├── pyproject.toml ├── scripts ├── annotations │ ├── annotation_scratch.R │ ├── entailment_annotation_analysis.R │ └── irr_calc.R ├── build_docbin_dataset.py ├── build_fact_decomp_prompted_dataset.py ├── build_nli_prompted_datasets.py ├── build_prompt_templates.py ├── combine_notes.py ├── datasets │ ├── download_hf_datasets.py │ ├── download_hf_datasets.sh │ ├── download_physionet_datasets.sh │ └── download_shc_datasets.sh ├── downsample_jsonl.sh ├── experiments │ ├── analyze_outputs.py │ ├── create_entailment_file_from_fact_decomp.sh │ ├── run_inference_client.sh │ └── run_nli_prompt_tuning_experiment.sh ├── get_entailment_pairs.py ├── get_sentence_splits.py ├── hotfixes │ └── get_note_provenance.py ├── init_all_datasets.sh ├── init_nli_datasets.sh ├── manuscript │ └── create_latex_tables.py ├── merge_fact_decomps.py ├── openai_inference_config.json ├── run_azure_openai_fact_decomp_jobs.sh ├── run_openai_client.sh ├── run_transformers_client.sh ├── sample_for_annotation.py ├── sample_source_datasets.py ├── score_entailment_pairs.py ├── slurm │ ├── fact_decomp_h100.sh │ └── test_nigam_h100.sh ├── test_transformers.py └── transformers_inference_config.json ├── src └── factehr │ ├── __init__.py │ ├── clients │ ├── azure_openai_api.py │ ├── azure_openai_api_example.py │ ├── azure_openai_api_parallel.py │ ├── generation_params.json │ ├── transformers_api.py │ ├── vertex_api.py │ ├── vertex_api_batch.py │ └── vertex_api_batch_subprocess.py │ ├── evaluation │ ├── entailment.py │ └── parse_nli_entailment.py │ ├── nlp │ ├── __init__.py │ ├── sbd.py │ └── tokenizer.py │ └── utils │ ├── __init__.py │ ├── compute_entailment_stats.py │ ├── core.py │ ├── estimate_llm_api_cost.py │ ├── get_intermediate_outputs.py │ ├── make_entailment_csv.py │ └── parse_facts.py └── tests ├── test_entailment.py └── test_split_facts.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/README.md -------------------------------------------------------------------------------- /data/datasets/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/datasets/.gitignore -------------------------------------------------------------------------------- /data/prompt_templates/binary_entailment/entailment_binary6.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/prompt_templates/binary_entailment/entailment_binary6.tmpl -------------------------------------------------------------------------------- /data/prompt_templates/fact_decomposition/prompt1.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/prompt_templates/fact_decomposition/prompt1.tmpl -------------------------------------------------------------------------------- /data/prompt_templates/fact_decomposition/prompt1_icl.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/prompt_templates/fact_decomposition/prompt1_icl.tmpl -------------------------------------------------------------------------------- /data/prompt_templates/fact_decomposition/prompt2.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/prompt_templates/fact_decomposition/prompt2.tmpl -------------------------------------------------------------------------------- /data/prompt_templates/fact_decomposition/prompt2_icl.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/prompt_templates/fact_decomposition/prompt2_icl.tmpl -------------------------------------------------------------------------------- /data/prompt_templates/fact_decomposition_delimiter/prompt1_icl.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/data/prompt_templates/fact_decomposition_delimiter/prompt1_icl.tmpl -------------------------------------------------------------------------------- /docs/dataset_summary.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/docs/dataset_summary.md -------------------------------------------------------------------------------- /docs/experiments.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/docs/experiments.md -------------------------------------------------------------------------------- /docs/nlp_benchmarks.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/docs/nlp_benchmarks.md -------------------------------------------------------------------------------- /docs/release_files.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/docs/release_files.md -------------------------------------------------------------------------------- /docs/runtimes_and_costs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/docs/runtimes_and_costs.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/annotations/annotation_scratch.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/annotations/annotation_scratch.R -------------------------------------------------------------------------------- /scripts/annotations/entailment_annotation_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/annotations/entailment_annotation_analysis.R -------------------------------------------------------------------------------- /scripts/annotations/irr_calc.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/annotations/irr_calc.R -------------------------------------------------------------------------------- /scripts/build_docbin_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/build_docbin_dataset.py -------------------------------------------------------------------------------- /scripts/build_fact_decomp_prompted_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/build_fact_decomp_prompted_dataset.py -------------------------------------------------------------------------------- /scripts/build_nli_prompted_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/build_nli_prompted_datasets.py -------------------------------------------------------------------------------- /scripts/build_prompt_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/build_prompt_templates.py -------------------------------------------------------------------------------- /scripts/combine_notes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/combine_notes.py -------------------------------------------------------------------------------- /scripts/datasets/download_hf_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/datasets/download_hf_datasets.py -------------------------------------------------------------------------------- /scripts/datasets/download_hf_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/datasets/download_hf_datasets.sh -------------------------------------------------------------------------------- /scripts/datasets/download_physionet_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/datasets/download_physionet_datasets.sh -------------------------------------------------------------------------------- /scripts/datasets/download_shc_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/datasets/download_shc_datasets.sh -------------------------------------------------------------------------------- /scripts/downsample_jsonl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/downsample_jsonl.sh -------------------------------------------------------------------------------- /scripts/experiments/analyze_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/experiments/analyze_outputs.py -------------------------------------------------------------------------------- /scripts/experiments/create_entailment_file_from_fact_decomp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/experiments/create_entailment_file_from_fact_decomp.sh -------------------------------------------------------------------------------- /scripts/experiments/run_inference_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/experiments/run_inference_client.sh -------------------------------------------------------------------------------- /scripts/experiments/run_nli_prompt_tuning_experiment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/experiments/run_nli_prompt_tuning_experiment.sh -------------------------------------------------------------------------------- /scripts/get_entailment_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/get_entailment_pairs.py -------------------------------------------------------------------------------- /scripts/get_sentence_splits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/get_sentence_splits.py -------------------------------------------------------------------------------- /scripts/hotfixes/get_note_provenance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/hotfixes/get_note_provenance.py -------------------------------------------------------------------------------- /scripts/init_all_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/init_all_datasets.sh -------------------------------------------------------------------------------- /scripts/init_nli_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/init_nli_datasets.sh -------------------------------------------------------------------------------- /scripts/manuscript/create_latex_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/manuscript/create_latex_tables.py -------------------------------------------------------------------------------- /scripts/merge_fact_decomps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/merge_fact_decomps.py -------------------------------------------------------------------------------- /scripts/openai_inference_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/openai_inference_config.json -------------------------------------------------------------------------------- /scripts/run_azure_openai_fact_decomp_jobs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/run_azure_openai_fact_decomp_jobs.sh -------------------------------------------------------------------------------- /scripts/run_openai_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/run_openai_client.sh -------------------------------------------------------------------------------- /scripts/run_transformers_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/run_transformers_client.sh -------------------------------------------------------------------------------- /scripts/sample_for_annotation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/sample_for_annotation.py -------------------------------------------------------------------------------- /scripts/sample_source_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/sample_source_datasets.py -------------------------------------------------------------------------------- /scripts/score_entailment_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/score_entailment_pairs.py -------------------------------------------------------------------------------- /scripts/slurm/fact_decomp_h100.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/slurm/fact_decomp_h100.sh -------------------------------------------------------------------------------- /scripts/slurm/test_nigam_h100.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/slurm/test_nigam_h100.sh -------------------------------------------------------------------------------- /scripts/test_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/test_transformers.py -------------------------------------------------------------------------------- /scripts/transformers_inference_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/scripts/transformers_inference_config.json -------------------------------------------------------------------------------- /src/factehr/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.0" 2 | -------------------------------------------------------------------------------- /src/factehr/clients/azure_openai_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/azure_openai_api.py -------------------------------------------------------------------------------- /src/factehr/clients/azure_openai_api_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/azure_openai_api_example.py -------------------------------------------------------------------------------- /src/factehr/clients/azure_openai_api_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/azure_openai_api_parallel.py -------------------------------------------------------------------------------- /src/factehr/clients/generation_params.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/generation_params.json -------------------------------------------------------------------------------- /src/factehr/clients/transformers_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/transformers_api.py -------------------------------------------------------------------------------- /src/factehr/clients/vertex_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/vertex_api.py -------------------------------------------------------------------------------- /src/factehr/clients/vertex_api_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/vertex_api_batch.py -------------------------------------------------------------------------------- /src/factehr/clients/vertex_api_batch_subprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/clients/vertex_api_batch_subprocess.py -------------------------------------------------------------------------------- /src/factehr/evaluation/entailment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/evaluation/entailment.py -------------------------------------------------------------------------------- /src/factehr/evaluation/parse_nli_entailment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/evaluation/parse_nli_entailment.py -------------------------------------------------------------------------------- /src/factehr/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/factehr/nlp/sbd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/nlp/sbd.py -------------------------------------------------------------------------------- /src/factehr/nlp/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/nlp/tokenizer.py -------------------------------------------------------------------------------- /src/factehr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | -------------------------------------------------------------------------------- /src/factehr/utils/compute_entailment_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/utils/compute_entailment_stats.py -------------------------------------------------------------------------------- /src/factehr/utils/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/utils/core.py -------------------------------------------------------------------------------- /src/factehr/utils/estimate_llm_api_cost.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/utils/estimate_llm_api_cost.py -------------------------------------------------------------------------------- /src/factehr/utils/get_intermediate_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/utils/get_intermediate_outputs.py -------------------------------------------------------------------------------- /src/factehr/utils/make_entailment_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/utils/make_entailment_csv.py -------------------------------------------------------------------------------- /src/factehr/utils/parse_facts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/src/factehr/utils/parse_facts.py -------------------------------------------------------------------------------- /tests/test_entailment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/tests/test_entailment.py -------------------------------------------------------------------------------- /tests/test_split_facts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/som-shahlab/factehr/HEAD/tests/test_split_facts.py --------------------------------------------------------------------------------