├── .github └── workflows │ ├── docs.yml │ ├── lint_checks.yml │ └── unit_test_coverage.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .root ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── SECURITY.md ├── THIRD-PARTY-LICENSES ├── devtool ├── examples ├── bedrock-claude-factual-knowledge.ipynb ├── bedrock-claude-summarization-accuracy.ipynb ├── byo-model-outputs.ipynb ├── crows-pairs_sample.jsonl ├── custom_model_runner_chat_gpt.ipynb ├── custom_model_runner_hf.ipynb ├── example_results │ ├── huggingface-llm-falcon-7b-bf16.json │ ├── huggingface-llm-falcon-7b-instruct-bf16.json │ └── radarplot.pdf ├── gigaword_sample.jsonl ├── jumpstart-falcon-stereotyping.ipynb ├── model-comparison.ipynb ├── real_toxicity_sample.jsonl └── trex_sample.jsonl ├── poetry.lock ├── pyproject.toml ├── setup.cfg ├── src ├── __init__.py └── fmeval │ ├── __init__.py │ ├── constants.py │ ├── data_loaders │ ├── __init__.py │ ├── data_config.py │ ├── data_sources.py │ ├── jmespath_util.py │ ├── json_data_loader.py │ ├── json_parser.py │ └── util.py │ ├── eval.py │ ├── eval_algo_mapping.py │ ├── eval_algorithms │ ├── __init__.py │ ├── classification_accuracy.py │ ├── classification_accuracy_semantic_robustness.py │ ├── common.py │ ├── eval_algorithm.py │ ├── factual_knowledge.py │ ├── general_semantic_robustness.py │ ├── helper_models │ │ ├── __init__.py │ │ └── helper_model.py │ ├── prompt_stereotyping.py │ ├── qa_accuracy.py │ ├── qa_accuracy_semantic_robustness.py │ ├── qa_toxicity.py │ ├── save_strategy.py │ ├── semantic_perturbation_utils.py │ ├── semantic_robustness_utils.py │ ├── summarization_accuracy.py │ ├── summarization_accuracy_semantic_robustness.py │ ├── summarization_toxicity.py │ ├── toxicity.py │ └── util.py │ ├── exceptions.py │ ├── model_runners │ ├── __init__.py │ ├── bedrock_model_runner.py │ ├── composers │ │ ├── __init__.py │ │ ├── composers.py │ │ ├── jumpstart_composer.py │ │ └── template.py │ ├── extractors │ │ ├── __init__.py │ │ ├── extractor.py │ │ ├── json_extractor.py │ │ └── jumpstart_extractor.py │ ├── model_runner.py │ ├── sm_jumpstart_model_runner.py │ ├── sm_model_runner.py │ └── util.py │ ├── perf_util.py │ ├── reporting │ ├── __init__.py │ ├── cells.py │ ├── constants.py │ ├── eval_output_cells.py │ └── util.py │ ├── transforms │ ├── __init__.py │ ├── batched_transform.py │ ├── common.py │ ├── semantic_perturbations.py │ ├── semantic_robustness_metrics.py │ ├── summarization_accuracy_metrics.py │ ├── transform.py │ ├── transform_pipeline.py │ └── util.py │ └── util.py └── test ├── __init__.py ├── integration ├── __init__.py ├── conftest.py ├── datasets │ ├── gigaword_sample.jsonl │ ├── real_toxicity_sample.jsonl │ ├── trex_sample.jsonl │ ├── trex_sample_small.jsonl │ ├── triviaQA_sample.jsonl │ └── triviaQA_sample_small.jsonl ├── models │ ├── __init__.py │ ├── hf_model_runner.py │ └── model_runners.py ├── test_classification_accuracy.py ├── test_classification_accuracy_semantic_robustness.py ├── test_create_extractor.py ├── test_factual_knowledge.py ├── test_general_semantic_robustness.py ├── test_prompt_stereotyping.py ├── test_qa_accuracy.py ├── test_qa_accuracy_semantic_robustness.py ├── test_summarization_accuracy.py ├── test_summarization_accuracy_semantic_robustness.py ├── test_toxicity.py ├── test_util.py └── transforms │ └── test_transform_pipeline.py └── unit ├── __init__.py ├── conftest.py ├── data_loaders ├── __init__.py ├── test_data_config.py ├── test_data_sources.py ├── test_jmespath_util.py ├── test_json_data_loader.py ├── test_json_parser.py └── test_util.py ├── eval_algorithms ├── __init__.py ├── test_classification_accuracy.py ├── test_classification_accuracy_semantic_robustness.py ├── test_common.py ├── test_dataclasses.py ├── test_eval_algorithm.py ├── test_factual_knowledge.py ├── test_general_semantic_robustness.py ├── test_helper_model.py ├── test_prompt_stereotyping.py ├── test_qa_accuracy.py ├── test_qa_accuracy_semantic_robustness.py ├── test_qa_toxicity.py ├── test_save_strategy.py ├── test_semantic_perturbation_utils.py ├── test_summarization_accuracy.py ├── test_summarization_accuracy_semantic_robustness.py ├── test_summarization_toxicity.py ├── test_task_eval_mapping.py ├── test_toxicity.py └── test_util.py ├── example_notebooks ├── __init__.py └── test_example_notebooks.py ├── model_runners ├── __init__.py ├── composers │ ├── __init__.py │ ├── test_composers.py │ ├── test_create_content_composer.py │ ├── test_jumpstart_composer.py │ └── test_vanilla_template.py ├── extractors │ ├── __init__.py │ ├── test_create_extractor.py │ ├── test_json_extractor.py │ └── test_jumpstart_extractor.py ├── test_bedrock_model_runner.py ├── test_model_runner.py ├── test_sm_jumpstart_model_runner.py ├── test_sm_model_runner.py └── test_util.py ├── reporting ├── __init__.py ├── test_cells.py ├── test_eval_output_cells.py └── test_util.py ├── test_eval_algo_mapping.py ├── test_util.py └── transforms ├── test_common.py ├── test_semantic_perturbations.py ├── test_semantic_robustness_metrics.py ├── test_summarization_accuracy_metrics.py ├── test_transform.py ├── test_transform_pipeline.py └── test_util.py /.github/workflows/docs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/.github/workflows/docs.yml -------------------------------------------------------------------------------- /.github/workflows/lint_checks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/.github/workflows/lint_checks.yml -------------------------------------------------------------------------------- /.github/workflows/unit_test_coverage.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/.github/workflows/unit_test_coverage.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.root: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/SECURITY.md -------------------------------------------------------------------------------- /THIRD-PARTY-LICENSES: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/THIRD-PARTY-LICENSES -------------------------------------------------------------------------------- /devtool: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/devtool -------------------------------------------------------------------------------- /examples/bedrock-claude-factual-knowledge.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/bedrock-claude-factual-knowledge.ipynb -------------------------------------------------------------------------------- /examples/bedrock-claude-summarization-accuracy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/bedrock-claude-summarization-accuracy.ipynb -------------------------------------------------------------------------------- /examples/byo-model-outputs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/byo-model-outputs.ipynb -------------------------------------------------------------------------------- /examples/crows-pairs_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/crows-pairs_sample.jsonl -------------------------------------------------------------------------------- /examples/custom_model_runner_chat_gpt.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/custom_model_runner_chat_gpt.ipynb -------------------------------------------------------------------------------- /examples/custom_model_runner_hf.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/custom_model_runner_hf.ipynb -------------------------------------------------------------------------------- /examples/example_results/huggingface-llm-falcon-7b-bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/example_results/huggingface-llm-falcon-7b-bf16.json -------------------------------------------------------------------------------- /examples/example_results/huggingface-llm-falcon-7b-instruct-bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/example_results/huggingface-llm-falcon-7b-instruct-bf16.json -------------------------------------------------------------------------------- /examples/example_results/radarplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/example_results/radarplot.pdf -------------------------------------------------------------------------------- /examples/gigaword_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/gigaword_sample.jsonl -------------------------------------------------------------------------------- /examples/jumpstart-falcon-stereotyping.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/jumpstart-falcon-stereotyping.ipynb -------------------------------------------------------------------------------- /examples/model-comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/model-comparison.ipynb -------------------------------------------------------------------------------- /examples/real_toxicity_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/real_toxicity_sample.jsonl -------------------------------------------------------------------------------- /examples/trex_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/examples/trex_sample.jsonl -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/setup.cfg -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/constants.py -------------------------------------------------------------------------------- /src/fmeval/data_loaders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/data_loaders/data_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/data_loaders/data_config.py -------------------------------------------------------------------------------- /src/fmeval/data_loaders/data_sources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/data_loaders/data_sources.py -------------------------------------------------------------------------------- /src/fmeval/data_loaders/jmespath_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/data_loaders/jmespath_util.py -------------------------------------------------------------------------------- /src/fmeval/data_loaders/json_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/data_loaders/json_data_loader.py -------------------------------------------------------------------------------- /src/fmeval/data_loaders/json_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/data_loaders/json_parser.py -------------------------------------------------------------------------------- /src/fmeval/data_loaders/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/data_loaders/util.py -------------------------------------------------------------------------------- /src/fmeval/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval.py -------------------------------------------------------------------------------- /src/fmeval/eval_algo_mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algo_mapping.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/__init__.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/classification_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/classification_accuracy.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/classification_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/classification_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/common.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/eval_algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/eval_algorithm.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/factual_knowledge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/factual_knowledge.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/general_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/general_semantic_robustness.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/helper_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/helper_models/helper_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/helper_models/helper_model.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/prompt_stereotyping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/prompt_stereotyping.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/qa_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/qa_accuracy.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/qa_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/qa_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/qa_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/qa_toxicity.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/save_strategy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/save_strategy.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/semantic_perturbation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/semantic_perturbation_utils.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/semantic_robustness_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/semantic_robustness_utils.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/summarization_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/summarization_accuracy.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/summarization_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/summarization_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/summarization_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/summarization_toxicity.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/toxicity.py -------------------------------------------------------------------------------- /src/fmeval/eval_algorithms/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/eval_algorithms/util.py -------------------------------------------------------------------------------- /src/fmeval/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/exceptions.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/model_runners/bedrock_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/bedrock_model_runner.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/composers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/composers/__init__.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/composers/composers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/composers/composers.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/composers/jumpstart_composer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/composers/jumpstart_composer.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/composers/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/composers/template.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/extractors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/extractors/__init__.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/extractors/extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/extractors/extractor.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/extractors/json_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/extractors/json_extractor.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/extractors/jumpstart_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/extractors/jumpstart_extractor.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/model_runner.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/sm_jumpstart_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/sm_jumpstart_model_runner.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/sm_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/sm_model_runner.py -------------------------------------------------------------------------------- /src/fmeval/model_runners/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/model_runners/util.py -------------------------------------------------------------------------------- /src/fmeval/perf_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/perf_util.py -------------------------------------------------------------------------------- /src/fmeval/reporting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/reporting/cells.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/reporting/cells.py -------------------------------------------------------------------------------- /src/fmeval/reporting/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/reporting/constants.py -------------------------------------------------------------------------------- /src/fmeval/reporting/eval_output_cells.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/reporting/eval_output_cells.py -------------------------------------------------------------------------------- /src/fmeval/reporting/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/reporting/util.py -------------------------------------------------------------------------------- /src/fmeval/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fmeval/transforms/batched_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/batched_transform.py -------------------------------------------------------------------------------- /src/fmeval/transforms/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/common.py -------------------------------------------------------------------------------- /src/fmeval/transforms/semantic_perturbations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/semantic_perturbations.py -------------------------------------------------------------------------------- /src/fmeval/transforms/semantic_robustness_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/semantic_robustness_metrics.py -------------------------------------------------------------------------------- /src/fmeval/transforms/summarization_accuracy_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/summarization_accuracy_metrics.py -------------------------------------------------------------------------------- /src/fmeval/transforms/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/transform.py -------------------------------------------------------------------------------- /src/fmeval/transforms/transform_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/transform_pipeline.py -------------------------------------------------------------------------------- /src/fmeval/transforms/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/transforms/util.py -------------------------------------------------------------------------------- /src/fmeval/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/src/fmeval/util.py -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/conftest.py -------------------------------------------------------------------------------- /test/integration/datasets/gigaword_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/datasets/gigaword_sample.jsonl -------------------------------------------------------------------------------- /test/integration/datasets/real_toxicity_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/datasets/real_toxicity_sample.jsonl -------------------------------------------------------------------------------- /test/integration/datasets/trex_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/datasets/trex_sample.jsonl -------------------------------------------------------------------------------- /test/integration/datasets/trex_sample_small.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/datasets/trex_sample_small.jsonl -------------------------------------------------------------------------------- /test/integration/datasets/triviaQA_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/datasets/triviaQA_sample.jsonl -------------------------------------------------------------------------------- /test/integration/datasets/triviaQA_sample_small.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/datasets/triviaQA_sample_small.jsonl -------------------------------------------------------------------------------- /test/integration/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/models/hf_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/models/hf_model_runner.py -------------------------------------------------------------------------------- /test/integration/models/model_runners.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/models/model_runners.py -------------------------------------------------------------------------------- /test/integration/test_classification_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_classification_accuracy.py -------------------------------------------------------------------------------- /test/integration/test_classification_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_classification_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /test/integration/test_create_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_create_extractor.py -------------------------------------------------------------------------------- /test/integration/test_factual_knowledge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_factual_knowledge.py -------------------------------------------------------------------------------- /test/integration/test_general_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_general_semantic_robustness.py -------------------------------------------------------------------------------- /test/integration/test_prompt_stereotyping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_prompt_stereotyping.py -------------------------------------------------------------------------------- /test/integration/test_qa_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_qa_accuracy.py -------------------------------------------------------------------------------- /test/integration/test_qa_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_qa_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /test/integration/test_summarization_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_summarization_accuracy.py -------------------------------------------------------------------------------- /test/integration/test_summarization_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_summarization_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /test/integration/test_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_toxicity.py -------------------------------------------------------------------------------- /test/integration/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/test_util.py -------------------------------------------------------------------------------- /test/integration/transforms/test_transform_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/integration/transforms/test_transform_pipeline.py -------------------------------------------------------------------------------- /test/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/conftest.py -------------------------------------------------------------------------------- /test/unit/data_loaders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/data_loaders/test_data_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/data_loaders/test_data_config.py -------------------------------------------------------------------------------- /test/unit/data_loaders/test_data_sources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/data_loaders/test_data_sources.py -------------------------------------------------------------------------------- /test/unit/data_loaders/test_jmespath_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/data_loaders/test_jmespath_util.py -------------------------------------------------------------------------------- /test/unit/data_loaders/test_json_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/data_loaders/test_json_data_loader.py -------------------------------------------------------------------------------- /test/unit/data_loaders/test_json_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/data_loaders/test_json_parser.py -------------------------------------------------------------------------------- /test/unit/data_loaders/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/data_loaders/test_util.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_classification_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_classification_accuracy.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_classification_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_classification_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_common.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_dataclasses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_dataclasses.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_eval_algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_eval_algorithm.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_factual_knowledge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_factual_knowledge.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_general_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_general_semantic_robustness.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_helper_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_helper_model.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_prompt_stereotyping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_prompt_stereotyping.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_qa_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_qa_accuracy.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_qa_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_qa_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_qa_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_qa_toxicity.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_save_strategy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_save_strategy.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_semantic_perturbation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_semantic_perturbation_utils.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_summarization_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_summarization_accuracy.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_summarization_accuracy_semantic_robustness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_summarization_accuracy_semantic_robustness.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_summarization_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_summarization_toxicity.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_task_eval_mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_task_eval_mapping.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_toxicity.py -------------------------------------------------------------------------------- /test/unit/eval_algorithms/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/eval_algorithms/test_util.py -------------------------------------------------------------------------------- /test/unit/example_notebooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/example_notebooks/test_example_notebooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/example_notebooks/test_example_notebooks.py -------------------------------------------------------------------------------- /test/unit/model_runners/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/model_runners/composers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/model_runners/composers/test_composers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/composers/test_composers.py -------------------------------------------------------------------------------- /test/unit/model_runners/composers/test_create_content_composer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/composers/test_create_content_composer.py -------------------------------------------------------------------------------- /test/unit/model_runners/composers/test_jumpstart_composer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/composers/test_jumpstart_composer.py -------------------------------------------------------------------------------- /test/unit/model_runners/composers/test_vanilla_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/composers/test_vanilla_template.py -------------------------------------------------------------------------------- /test/unit/model_runners/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/model_runners/extractors/test_create_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/extractors/test_create_extractor.py -------------------------------------------------------------------------------- /test/unit/model_runners/extractors/test_json_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/extractors/test_json_extractor.py -------------------------------------------------------------------------------- /test/unit/model_runners/extractors/test_jumpstart_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/extractors/test_jumpstart_extractor.py -------------------------------------------------------------------------------- /test/unit/model_runners/test_bedrock_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/test_bedrock_model_runner.py -------------------------------------------------------------------------------- /test/unit/model_runners/test_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/test_model_runner.py -------------------------------------------------------------------------------- /test/unit/model_runners/test_sm_jumpstart_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/test_sm_jumpstart_model_runner.py -------------------------------------------------------------------------------- /test/unit/model_runners/test_sm_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/test_sm_model_runner.py -------------------------------------------------------------------------------- /test/unit/model_runners/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/model_runners/test_util.py -------------------------------------------------------------------------------- /test/unit/reporting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/reporting/test_cells.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/reporting/test_cells.py -------------------------------------------------------------------------------- /test/unit/reporting/test_eval_output_cells.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/reporting/test_eval_output_cells.py -------------------------------------------------------------------------------- /test/unit/reporting/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/reporting/test_util.py -------------------------------------------------------------------------------- /test/unit/test_eval_algo_mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/test_eval_algo_mapping.py -------------------------------------------------------------------------------- /test/unit/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/test_util.py -------------------------------------------------------------------------------- /test/unit/transforms/test_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_common.py -------------------------------------------------------------------------------- /test/unit/transforms/test_semantic_perturbations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_semantic_perturbations.py -------------------------------------------------------------------------------- /test/unit/transforms/test_semantic_robustness_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_semantic_robustness_metrics.py -------------------------------------------------------------------------------- /test/unit/transforms/test_summarization_accuracy_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_summarization_accuracy_metrics.py -------------------------------------------------------------------------------- /test/unit/transforms/test_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_transform.py -------------------------------------------------------------------------------- /test/unit/transforms/test_transform_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_transform_pipeline.py -------------------------------------------------------------------------------- /test/unit/transforms/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws/fmeval/HEAD/test/unit/transforms/test_util.py --------------------------------------------------------------------------------