├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .env.sample ├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yaml └── workflows │ ├── azure-dev.yaml │ ├── bicep-audit.yml │ └── python.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode ├── launch.json └── settings.json ├── LICENSE.md ├── README.md ├── azure.yaml ├── docs ├── screenshot_compare.png └── screenshot_summary.png ├── dontknows.config.json ├── example_config.json ├── example_input ├── prompt_ignoresources.txt ├── prompt_nomarkdownmention.txt ├── prompt_piglatin.txt ├── prompt_refined.txt ├── prompt_refined_trimmed.txt ├── prompt_ungrounded.txt ├── prompt_weak.txt ├── qa.jsonl └── qa_dontknows.jsonl ├── example_results ├── baseline │ ├── config.json │ ├── eval_results.jsonl │ ├── evaluate_parameters.json │ └── summary.json ├── baseline2 │ ├── config.json │ ├── eval_results.jsonl │ ├── evaluate_parameters.json │ └── summary.json ├── prompt_nomarkdownmention │ ├── config.json │ ├── eval_results.jsonl │ ├── evaluate_parameters.json │ └── summary.json └── prompt_nomarkdownmention2 │ ├── config.json │ ├── eval_results.jsonl │ ├── evaluate_parameters.json │ └── summary.json ├── infra ├── core │ ├── ai │ │ └── cognitiveservices.bicep │ └── security │ │ └── role.bicep ├── main.bicep └── main.parameters.json ├── pyproject.toml ├── src └── evaltools │ ├── __init__.py │ ├── __main__.py │ ├── cli.py │ ├── eval │ ├── __init__.py │ ├── evaluate.py │ └── evaluate_metrics │ │ ├── __init__.py │ │ ├── base_metric.py │ │ ├── builtin_metrics.py │ │ ├── code_metrics.py │ │ ├── prompt_metrics.py │ │ └── prompts │ │ ├── dontknowness.prompty │ │ ├── mycoherence.prompty │ │ ├── mygroundedness.prompty │ │ └── myrelevance.prompty │ ├── gen │ ├── __init__.py │ └── generate.py │ ├── review │ ├── __init__.py │ ├── answers.html │ ├── diff_app.py │ ├── diff_app.tcss │ ├── diff_markdown.py │ ├── parameters_screen.tcss │ ├── requirements.txt │ ├── summary_app.py │ ├── summary_app.tcss │ ├── summary_markdown.py │ └── utils.py │ └── service_setup.py └── tests ├── test_evaluate.py └── test_evaluate_metrics.py /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.devcontainer/devcontainer.json -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.env.sample -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/CONTRIBUTING.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/ISSUE_TEMPLATE.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/dependabot.yaml -------------------------------------------------------------------------------- /.github/workflows/azure-dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/workflows/azure-dev.yaml -------------------------------------------------------------------------------- /.github/workflows/bicep-audit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/workflows/bicep-audit.yml -------------------------------------------------------------------------------- /.github/workflows/python.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.github/workflows/python.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.vscode/launch.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/README.md -------------------------------------------------------------------------------- /azure.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/azure.yaml -------------------------------------------------------------------------------- /docs/screenshot_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/docs/screenshot_compare.png -------------------------------------------------------------------------------- /docs/screenshot_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/docs/screenshot_summary.png -------------------------------------------------------------------------------- /dontknows.config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/dontknows.config.json -------------------------------------------------------------------------------- /example_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_config.json -------------------------------------------------------------------------------- /example_input/prompt_ignoresources.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/prompt_ignoresources.txt -------------------------------------------------------------------------------- /example_input/prompt_nomarkdownmention.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/prompt_nomarkdownmention.txt -------------------------------------------------------------------------------- /example_input/prompt_piglatin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/prompt_piglatin.txt -------------------------------------------------------------------------------- /example_input/prompt_refined.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/prompt_refined.txt -------------------------------------------------------------------------------- /example_input/prompt_refined_trimmed.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/prompt_refined_trimmed.txt -------------------------------------------------------------------------------- /example_input/prompt_ungrounded.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/prompt_ungrounded.txt -------------------------------------------------------------------------------- /example_input/prompt_weak.txt: -------------------------------------------------------------------------------- 1 | You are a helpful assistant. 2 | -------------------------------------------------------------------------------- /example_input/qa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/qa.jsonl -------------------------------------------------------------------------------- /example_input/qa_dontknows.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_input/qa_dontknows.jsonl -------------------------------------------------------------------------------- /example_results/baseline/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline/config.json -------------------------------------------------------------------------------- /example_results/baseline/eval_results.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline/eval_results.jsonl -------------------------------------------------------------------------------- /example_results/baseline/evaluate_parameters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline/evaluate_parameters.json -------------------------------------------------------------------------------- /example_results/baseline/summary.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline/summary.json -------------------------------------------------------------------------------- /example_results/baseline2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline2/config.json -------------------------------------------------------------------------------- /example_results/baseline2/eval_results.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline2/eval_results.jsonl -------------------------------------------------------------------------------- /example_results/baseline2/evaluate_parameters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline2/evaluate_parameters.json -------------------------------------------------------------------------------- /example_results/baseline2/summary.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/baseline2/summary.json -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention/config.json -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention/eval_results.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention/eval_results.jsonl -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention/evaluate_parameters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention/evaluate_parameters.json -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention/summary.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention/summary.json -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention2/config.json -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention2/eval_results.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention2/eval_results.jsonl -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention2/evaluate_parameters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention2/evaluate_parameters.json -------------------------------------------------------------------------------- /example_results/prompt_nomarkdownmention2/summary.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/example_results/prompt_nomarkdownmention2/summary.json -------------------------------------------------------------------------------- /infra/core/ai/cognitiveservices.bicep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/infra/core/ai/cognitiveservices.bicep -------------------------------------------------------------------------------- /infra/core/security/role.bicep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/infra/core/security/role.bicep -------------------------------------------------------------------------------- /infra/main.bicep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/infra/main.bicep -------------------------------------------------------------------------------- /infra/main.parameters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/infra/main.parameters.json -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/pyproject.toml -------------------------------------------------------------------------------- /src/evaltools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/evaltools/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/__main__.py -------------------------------------------------------------------------------- /src/evaltools/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/cli.py -------------------------------------------------------------------------------- /src/evaltools/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate.py -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/__init__.py -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/base_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/base_metric.py -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/builtin_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/builtin_metrics.py -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/code_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/code_metrics.py -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/prompt_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/prompt_metrics.py -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/prompts/dontknowness.prompty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/prompts/dontknowness.prompty -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/prompts/mycoherence.prompty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/prompts/mycoherence.prompty -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/prompts/mygroundedness.prompty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/prompts/mygroundedness.prompty -------------------------------------------------------------------------------- /src/evaltools/eval/evaluate_metrics/prompts/myrelevance.prompty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/eval/evaluate_metrics/prompts/myrelevance.prompty -------------------------------------------------------------------------------- /src/evaltools/gen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/evaltools/gen/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/gen/generate.py -------------------------------------------------------------------------------- /src/evaltools/review/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/evaltools/review/answers.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/answers.html -------------------------------------------------------------------------------- /src/evaltools/review/diff_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/diff_app.py -------------------------------------------------------------------------------- /src/evaltools/review/diff_app.tcss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/diff_app.tcss -------------------------------------------------------------------------------- /src/evaltools/review/diff_markdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/diff_markdown.py -------------------------------------------------------------------------------- /src/evaltools/review/parameters_screen.tcss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/parameters_screen.tcss -------------------------------------------------------------------------------- /src/evaltools/review/requirements.txt: -------------------------------------------------------------------------------- 1 | textual 2 | typer 3 | -------------------------------------------------------------------------------- /src/evaltools/review/summary_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/summary_app.py -------------------------------------------------------------------------------- /src/evaltools/review/summary_app.tcss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/summary_app.tcss -------------------------------------------------------------------------------- /src/evaltools/review/summary_markdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/summary_markdown.py -------------------------------------------------------------------------------- /src/evaltools/review/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/review/utils.py -------------------------------------------------------------------------------- /src/evaltools/service_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/src/evaltools/service_setup.py -------------------------------------------------------------------------------- /tests/test_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/tests/test_evaluate.py -------------------------------------------------------------------------------- /tests/test_evaluate_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-rag-chat-evaluator/HEAD/tests/test_evaluate_metrics.py --------------------------------------------------------------------------------