├── .gitignore ├── LICENSE ├── README.md ├── figs ├── ragas_metrics.png └── score_distribs.png ├── rag-data ├── amnesty_qa-new-01.jsonl ├── amnesty_qa-new-01.tsv ├── amnesty_qa-new-02.jsonl ├── more-q-01.jsonl └── output-01.tsv ├── requirements.txt ├── resources ├── configs │ ├── answer_correctness-best.json │ ├── answer_relevance-best.json │ ├── context_precision-best.json │ ├── context_recall-best.json │ ├── context_relevance-best.json │ └── faithfulness-best.json ├── demodata │ └── amnesty_qa.jsonl ├── prompts │ ├── answer_correctness_1.txt │ ├── answer_relevance_1.txt │ ├── answer_relevance_2.txt │ ├── context_precision_1.txt │ ├── context_recall_1.txt │ ├── context_relevance_1.txt │ ├── faithfulness_1.txt │ ├── faithfulness_2.txt │ └── more_questions.txt └── reports │ ├── dspy-reports │ ├── answer_correctness_report.tsv │ ├── answer_relevance_report.tsv │ ├── context_precision_report.tsv │ ├── context_recall_report.tsv │ ├── context_relevance_report.tsv │ ├── context_utilization_report.tsv │ └── faithfulness_report.tsv │ └── lcel-reports │ ├── answer_correctness_report.tsv │ ├── answer_relevance_report.tsv │ ├── answer_similarity_report.tsv │ ├── context_precision_report.tsv │ ├── context_recall_report.tsv │ ├── context_relevance_report.tsv │ ├── context_utilization_report.tsv │ └── faithfulness_report.tsv └── src ├── convert_amnestyQA_to_desired_format.ipynb ├── generate_datasets.py ├── learned ├── __init__.py ├── answer_correctness.py ├── answer_relevance.py ├── context_precision.py ├── context_recall.py ├── context_relevance.py ├── faithfulness.py └── learning_utils.py ├── make_more_questions.py ├── metrics.py ├── prompted ├── __init__.py ├── answer_correctness.py ├── answer_relevance.py ├── answer_similarity.py ├── context_precision.py ├── context_recall.py ├── context_relevance.py ├── faithfulness.py ├── more_questions.py └── prompt_utils.py ├── run_learned_metrics.py ├── run_prompted_metrics.py ├── score_distributions.py ├── streamlit_app.py └── test_nltk.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/README.md -------------------------------------------------------------------------------- /figs/ragas_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/figs/ragas_metrics.png -------------------------------------------------------------------------------- /figs/score_distribs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/figs/score_distribs.png -------------------------------------------------------------------------------- /rag-data/amnesty_qa-new-01.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/rag-data/amnesty_qa-new-01.jsonl -------------------------------------------------------------------------------- /rag-data/amnesty_qa-new-01.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/rag-data/amnesty_qa-new-01.tsv -------------------------------------------------------------------------------- /rag-data/amnesty_qa-new-02.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/rag-data/amnesty_qa-new-02.jsonl -------------------------------------------------------------------------------- /rag-data/more-q-01.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/rag-data/more-q-01.jsonl -------------------------------------------------------------------------------- /rag-data/output-01.tsv: -------------------------------------------------------------------------------- 1 | #QID CONTEXT_RECALL 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/requirements.txt -------------------------------------------------------------------------------- /resources/configs/answer_correctness-best.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/configs/answer_correctness-best.json -------------------------------------------------------------------------------- /resources/configs/answer_relevance-best.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/configs/answer_relevance-best.json -------------------------------------------------------------------------------- /resources/configs/context_precision-best.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/configs/context_precision-best.json -------------------------------------------------------------------------------- /resources/configs/context_recall-best.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/configs/context_recall-best.json -------------------------------------------------------------------------------- /resources/configs/context_relevance-best.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/configs/context_relevance-best.json -------------------------------------------------------------------------------- /resources/configs/faithfulness-best.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/configs/faithfulness-best.json -------------------------------------------------------------------------------- /resources/demodata/amnesty_qa.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/demodata/amnesty_qa.jsonl -------------------------------------------------------------------------------- /resources/prompts/answer_correctness_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/answer_correctness_1.txt -------------------------------------------------------------------------------- /resources/prompts/answer_relevance_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/answer_relevance_1.txt -------------------------------------------------------------------------------- /resources/prompts/answer_relevance_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/answer_relevance_2.txt -------------------------------------------------------------------------------- /resources/prompts/context_precision_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/context_precision_1.txt -------------------------------------------------------------------------------- /resources/prompts/context_recall_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/context_recall_1.txt -------------------------------------------------------------------------------- /resources/prompts/context_relevance_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/context_relevance_1.txt -------------------------------------------------------------------------------- /resources/prompts/faithfulness_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/faithfulness_1.txt -------------------------------------------------------------------------------- /resources/prompts/faithfulness_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/faithfulness_2.txt -------------------------------------------------------------------------------- /resources/prompts/more_questions.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/prompts/more_questions.txt -------------------------------------------------------------------------------- /resources/reports/dspy-reports/answer_correctness_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/answer_correctness_report.tsv -------------------------------------------------------------------------------- /resources/reports/dspy-reports/answer_relevance_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/answer_relevance_report.tsv -------------------------------------------------------------------------------- /resources/reports/dspy-reports/context_precision_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/context_precision_report.tsv -------------------------------------------------------------------------------- /resources/reports/dspy-reports/context_recall_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/context_recall_report.tsv -------------------------------------------------------------------------------- /resources/reports/dspy-reports/context_relevance_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/context_relevance_report.tsv -------------------------------------------------------------------------------- /resources/reports/dspy-reports/context_utilization_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/context_utilization_report.tsv -------------------------------------------------------------------------------- /resources/reports/dspy-reports/faithfulness_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/dspy-reports/faithfulness_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/answer_correctness_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/answer_correctness_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/answer_relevance_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/answer_relevance_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/answer_similarity_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/answer_similarity_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/context_precision_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/context_precision_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/context_recall_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/context_recall_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/context_relevance_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/context_relevance_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/context_utilization_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/context_utilization_report.tsv -------------------------------------------------------------------------------- /resources/reports/lcel-reports/faithfulness_report.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/resources/reports/lcel-reports/faithfulness_report.tsv -------------------------------------------------------------------------------- /src/convert_amnestyQA_to_desired_format.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/convert_amnestyQA_to_desired_format.ipynb -------------------------------------------------------------------------------- /src/generate_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/generate_datasets.py -------------------------------------------------------------------------------- /src/learned/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/learned/answer_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/answer_correctness.py -------------------------------------------------------------------------------- /src/learned/answer_relevance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/answer_relevance.py -------------------------------------------------------------------------------- /src/learned/context_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/context_precision.py -------------------------------------------------------------------------------- /src/learned/context_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/context_recall.py -------------------------------------------------------------------------------- /src/learned/context_relevance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/context_relevance.py -------------------------------------------------------------------------------- /src/learned/faithfulness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/faithfulness.py -------------------------------------------------------------------------------- /src/learned/learning_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/learned/learning_utils.py -------------------------------------------------------------------------------- /src/make_more_questions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/make_more_questions.py -------------------------------------------------------------------------------- /src/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/metrics.py -------------------------------------------------------------------------------- /src/prompted/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/prompted/answer_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/answer_correctness.py -------------------------------------------------------------------------------- /src/prompted/answer_relevance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/answer_relevance.py -------------------------------------------------------------------------------- /src/prompted/answer_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/answer_similarity.py -------------------------------------------------------------------------------- /src/prompted/context_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/context_precision.py -------------------------------------------------------------------------------- /src/prompted/context_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/context_recall.py -------------------------------------------------------------------------------- /src/prompted/context_relevance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/context_relevance.py -------------------------------------------------------------------------------- /src/prompted/faithfulness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/faithfulness.py -------------------------------------------------------------------------------- /src/prompted/more_questions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/more_questions.py -------------------------------------------------------------------------------- /src/prompted/prompt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/prompted/prompt_utils.py -------------------------------------------------------------------------------- /src/run_learned_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/run_learned_metrics.py -------------------------------------------------------------------------------- /src/run_prompted_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/run_prompted_metrics.py -------------------------------------------------------------------------------- /src/score_distributions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/score_distributions.py -------------------------------------------------------------------------------- /src/streamlit_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/streamlit_app.py -------------------------------------------------------------------------------- /src/test_nltk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sujitpal/llm-rag-eval/HEAD/src/test_nltk.py --------------------------------------------------------------------------------