├── .dockerignore ├── .github └── workflows │ ├── docker-publish.yml │ └── python-publish.yml ├── .gitignore ├── Dockerfile ├── Dockerfile.lambda ├── LICENSE.md ├── Makefile ├── README.md ├── docs ├── .gitignore ├── .vscode │ └── settings.json ├── README.md ├── api-reference │ ├── endpoint │ │ ├── azure-jailbreak-detection.mdx │ │ ├── azure-prompt-injection-detection.mdx │ │ ├── competitor-blocklist.mdx │ │ ├── competitor-detection-llm.mdx │ │ ├── content-safety.mdx │ │ ├── google-cloud-dlp-pii-detection.mdx │ │ ├── haystack-faithfulness.mdx │ │ ├── lingua-language-detection.mdx │ │ ├── llama-guard.mdx │ │ ├── llm-basic-evaluator.mdx │ │ ├── llm-boolean-evaluator.mdx │ │ ├── llm-score-evaluator.mdx │ │ ├── llm-similarity-evaluator.mdx │ │ ├── off-topic-detection.mdx │ │ ├── openai-moderation.mdx │ │ ├── ragas-answer-relevancy.mdx │ │ ├── ragas-context-precision.mdx │ │ ├── ragas-context-recall.mdx │ │ ├── ragas-context-relevancy.mdx │ │ ├── ragas-context-utilization.mdx │ │ └── ragas-faithfulness.mdx │ ├── introduction.mdx │ └── openapi.json ├── documentation │ ├── API-example.mdx │ ├── batch-evaluation.mdx │ ├── evaluators.mdx │ ├── getting-started.mdx │ ├── introduction.mdx │ ├── modular-architecture │ │ ├── base-evaluator.mdx │ │ └── contributing.mdx │ ├── plan.mdx │ ├── quickstart.mdx │ └── unit-tests.mdx ├── evaluators │ ├── lingua.mdx │ └── openai-moderation.mdx ├── favicon.svg ├── how-to-choose-your-evaluator.mdx ├── introduction.mdx ├── logo │ ├── dark.svg │ └── light.svg ├── mint.json ├── status.mdx ├── style.css └── tutorials │ ├── ci-cd-pipeline-evaluation.mdx │ ├── extensive-unit-testing.mdx │ └── rag-evaluation.mdx ├── evaluators ├── azure │ ├── langevals_azure │ │ ├── content_safety.py │ │ ├── jailbreak.py │ │ └── prompt_injection.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ ├── test_content_safety.py │ │ ├── test_jailbreak.py │ │ └── test_prompt_injection.py ├── example │ ├── langevals_example │ │ └── word_count.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ └── test_word_count.py ├── huggingface │ ├── langevals_huggingface │ │ └── llama_guard.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ └── test_llama_guard.py ├── langevals │ ├── langevals_langevals │ │ ├── basic.py │ │ ├── competitor_blocklist.py │ │ ├── competitor_llm.py │ │ ├── competitor_llm_function_call.py │ │ ├── exact_match.py │ │ ├── llm_answer_match.py │ │ ├── llm_boolean.py │ │ ├── llm_category.py │ │ ├── llm_score.py │ │ ├── models │ │ │ └── product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_84.0_manually_adjusted.json │ │ ├── off_topic.py │ │ ├── query_resolution.py │ │ ├── similarity.py │ │ └── valid_format.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ ├── test_basic.py │ │ ├── test_competitor_blocklist.py │ │ ├── test_competitor_llm.py │ │ ├── test_competitor_llm_function_call.py │ │ ├── test_exact_match.py │ │ ├── test_llm_answer_match.py │ │ ├── test_llm_boolean.py │ │ ├── test_llm_category.py │ │ ├── test_llm_score.py │ │ ├── test_off_topic.py │ │ ├── test_query_resolution.py │ │ ├── test_similarity.py │ │ └── test_valid_format.py ├── legacy │ ├── langevals_legacy │ │ ├── lib │ │ │ └── setup_legacy_packages.py │ │ ├── ragas_answer_correctness.py │ │ ├── ragas_answer_relevancy.py │ │ ├── ragas_context_precision.py │ │ ├── ragas_context_recall.py │ │ ├── ragas_context_relevancy.py │ │ ├── ragas_context_utilization.py │ │ ├── ragas_faithfulness.py │ │ ├── ragas_lib │ │ │ ├── common.py │ │ │ └── model_to_langchain.py │ │ └── vendor │ │ │ └── legacy_ragas │ │ │ ├── __init__.py │ │ │ ├── _analytics.py │ │ │ ├── _version.py │ │ │ ├── adaptation.py │ │ │ ├── async_utils.py │ │ │ ├── callbacks.py │ │ │ ├── embeddings │ │ │ ├── __init__.py │ │ │ └── base.py │ │ │ ├── evaluation.py │ │ │ ├── exceptions.py │ │ │ ├── executor.py │ │ │ ├── integrations │ │ │ ├── __init__.py │ │ │ ├── langchain.py │ │ │ └── langsmith.py │ │ │ ├── llms │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── json_load.py │ │ │ ├── output_parser.py │ │ │ └── prompt.py │ │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── _answer_correctness.py │ │ │ ├── _answer_relevance.py │ │ │ ├── _answer_similarity.py │ │ │ ├── _context_entities_recall.py │ │ │ ├── _context_precision.py │ │ │ ├── _context_recall.py │ │ │ ├── _context_relevancy.py │ │ │ ├── _faithfulness.py │ │ │ ├── base.py │ │ │ └── critique.py │ │ │ ├── run_config.py │ │ │ ├── testset │ │ │ ├── __init__.py │ │ │ ├── docstore.py │ │ │ ├── evolutions.py │ │ │ ├── extractor.py │ │ │ ├── filters.py │ │ │ ├── generator.py │ │ │ ├── prompts.py │ │ │ └── utils.py │ │ │ ├── utils.py │ │ │ └── validation.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ └── test_ragas.py ├── lingua │ ├── langevals_lingua │ │ └── language_detection.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ └── test_language_detection.py ├── openai │ ├── langevals_openai │ │ └── moderation.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ └── test_moderation.py ├── presidio │ ├── .gitignore │ ├── langevals_presidio │ │ └── pii_detection.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ │ └── test_pii_detection.py └── ragas │ ├── langevals_ragas │ ├── bleu_score.py │ ├── context_f1.py │ ├── context_precision.py │ ├── context_recall.py │ ├── factual_correctness.py │ ├── faithfulness.py │ ├── lib │ │ ├── common.py │ │ └── model_to_langchain.py │ ├── response_context_precision.py │ ├── response_context_recall.py │ ├── response_relevancy.py │ ├── rouge_score.py │ ├── rubrics_based_scoring.py │ ├── sql_query_equivalence.py │ └── summarization_score.py │ ├── poetry.lock │ ├── poetry.toml │ ├── pyproject.toml │ └── tests │ └── test_ragas.py ├── langevals ├── __init__.py ├── evaluation.py ├── expect.py ├── pytest_plugins │ ├── __init__.py │ └── pass_rate_plugin.py ├── server.py └── utils.py ├── langevals_core ├── langevals_core │ ├── base_evaluator.py │ ├── litellm_patch.py │ └── utils.py ├── poetry.lock ├── poetry.toml └── pyproject.toml ├── notebooks ├── data │ └── .gitkeep ├── intent_recognition.ipynb ├── poetry.lock ├── poetry.toml ├── presidio.ipynb ├── product_sentiment_polarity │ ├── data │ │ ├── product-sentiment-dataset.csv │ │ ├── smartphone-labeled-transformed.csv │ │ ├── smartphone-questions-responses.csv │ │ ├── smartphone-questions.csv │ │ └── unrelated-questions.csv │ ├── product_dataset_builder.ipynb │ ├── product_sentiment_experiment_gpt-3.5-turbo.ipynb │ ├── product_sentiment_polarity_anthropic.ipynb │ ├── product_sentiment_polarity_openai.ipynb │ └── results │ │ ├── product_sentiment_polarity_anthropic_claude-3-haiku_merry-thankful-spaniel_train_56.0_dev_65.85.json │ │ ├── product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_80.0.json │ │ ├── product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_84.0_manually_adjusted.json │ │ └── product_sentiment_polarity_openai_gpt-3.5-turbo_dashing-eccentric-chowchow_train_86.0_dev_87.8.json ├── pyproject.toml └── tutorials │ ├── dspy_rag.ipynb │ ├── extensive_unit_testing.ipynb │ └── rag_evaluation.ipynb ├── poetry.lock ├── poetry.toml ├── pyproject.toml ├── scripts ├── check_version_bump.sh ├── generate_evaluator_dependencies.py ├── generate_evaluator_descriptions.py ├── generate_evaluators_ts.py ├── generate_workspace.py └── replace_develop_dependencies.py ├── tests ├── test_azure_evaluation.py ├── test_evaluation.py ├── test_llm_as_judge.py ├── test_out_of_the_box_evaluators.py └── test_simple_assertion.py └── ts-integration └── evaluators.generated.ts /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/.github/workflows/docker-publish.yml -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/.github/workflows/python-publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/Dockerfile -------------------------------------------------------------------------------- /Dockerfile.lambda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/Dockerfile.lambda -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/LICENSE.md -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/README.md -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | !openapi.json -------------------------------------------------------------------------------- /docs/.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/.vscode/settings.json -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/api-reference/endpoint/azure-jailbreak-detection.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/azure-jailbreak-detection.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/azure-prompt-injection-detection.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/azure-prompt-injection-detection.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/competitor-blocklist.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/competitor-blocklist.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/competitor-detection-llm.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/competitor-detection-llm.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/content-safety.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/content-safety.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/google-cloud-dlp-pii-detection.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/google-cloud-dlp-pii-detection.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/haystack-faithfulness.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/haystack-faithfulness.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/lingua-language-detection.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/lingua-language-detection.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/llama-guard.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/llama-guard.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/llm-basic-evaluator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/llm-basic-evaluator.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/llm-boolean-evaluator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/llm-boolean-evaluator.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/llm-score-evaluator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/llm-score-evaluator.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/llm-similarity-evaluator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/llm-similarity-evaluator.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/off-topic-detection.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/off-topic-detection.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/openai-moderation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/openai-moderation.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/ragas-answer-relevancy.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/ragas-answer-relevancy.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/ragas-context-precision.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/ragas-context-precision.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/ragas-context-recall.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/ragas-context-recall.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/ragas-context-relevancy.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/ragas-context-relevancy.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/ragas-context-utilization.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/ragas-context-utilization.mdx -------------------------------------------------------------------------------- /docs/api-reference/endpoint/ragas-faithfulness.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/endpoint/ragas-faithfulness.mdx -------------------------------------------------------------------------------- /docs/api-reference/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/introduction.mdx -------------------------------------------------------------------------------- /docs/api-reference/openapi.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/api-reference/openapi.json -------------------------------------------------------------------------------- /docs/documentation/API-example.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/API-example.mdx -------------------------------------------------------------------------------- /docs/documentation/batch-evaluation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/batch-evaluation.mdx -------------------------------------------------------------------------------- /docs/documentation/evaluators.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/evaluators.mdx -------------------------------------------------------------------------------- /docs/documentation/getting-started.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/getting-started.mdx -------------------------------------------------------------------------------- /docs/documentation/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/introduction.mdx -------------------------------------------------------------------------------- /docs/documentation/modular-architecture/base-evaluator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/modular-architecture/base-evaluator.mdx -------------------------------------------------------------------------------- /docs/documentation/modular-architecture/contributing.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/modular-architecture/contributing.mdx -------------------------------------------------------------------------------- /docs/documentation/plan.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/plan.mdx -------------------------------------------------------------------------------- /docs/documentation/quickstart.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/quickstart.mdx -------------------------------------------------------------------------------- /docs/documentation/unit-tests.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/documentation/unit-tests.mdx -------------------------------------------------------------------------------- /docs/evaluators/lingua.mdx: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/evaluators/openai-moderation.mdx: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/favicon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/favicon.svg -------------------------------------------------------------------------------- /docs/how-to-choose-your-evaluator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/how-to-choose-your-evaluator.mdx -------------------------------------------------------------------------------- /docs/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/introduction.mdx -------------------------------------------------------------------------------- /docs/logo/dark.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/logo/dark.svg -------------------------------------------------------------------------------- /docs/logo/light.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/logo/light.svg -------------------------------------------------------------------------------- /docs/mint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/mint.json -------------------------------------------------------------------------------- /docs/status.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/status.mdx -------------------------------------------------------------------------------- /docs/style.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/style.css -------------------------------------------------------------------------------- /docs/tutorials/ci-cd-pipeline-evaluation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/tutorials/ci-cd-pipeline-evaluation.mdx -------------------------------------------------------------------------------- /docs/tutorials/extensive-unit-testing.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/tutorials/extensive-unit-testing.mdx -------------------------------------------------------------------------------- /docs/tutorials/rag-evaluation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/docs/tutorials/rag-evaluation.mdx -------------------------------------------------------------------------------- /evaluators/azure/langevals_azure/content_safety.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/langevals_azure/content_safety.py -------------------------------------------------------------------------------- /evaluators/azure/langevals_azure/jailbreak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/langevals_azure/jailbreak.py -------------------------------------------------------------------------------- /evaluators/azure/langevals_azure/prompt_injection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/langevals_azure/prompt_injection.py -------------------------------------------------------------------------------- /evaluators/azure/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/poetry.lock -------------------------------------------------------------------------------- /evaluators/azure/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/azure/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/pyproject.toml -------------------------------------------------------------------------------- /evaluators/azure/tests/test_content_safety.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/tests/test_content_safety.py -------------------------------------------------------------------------------- /evaluators/azure/tests/test_jailbreak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/tests/test_jailbreak.py -------------------------------------------------------------------------------- /evaluators/azure/tests/test_prompt_injection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/azure/tests/test_prompt_injection.py -------------------------------------------------------------------------------- /evaluators/example/langevals_example/word_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/example/langevals_example/word_count.py -------------------------------------------------------------------------------- /evaluators/example/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/example/poetry.lock -------------------------------------------------------------------------------- /evaluators/example/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/example/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/example/pyproject.toml -------------------------------------------------------------------------------- /evaluators/example/tests/test_word_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/example/tests/test_word_count.py -------------------------------------------------------------------------------- /evaluators/huggingface/langevals_huggingface/llama_guard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/huggingface/langevals_huggingface/llama_guard.py -------------------------------------------------------------------------------- /evaluators/huggingface/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/huggingface/poetry.lock -------------------------------------------------------------------------------- /evaluators/huggingface/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/huggingface/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/huggingface/pyproject.toml -------------------------------------------------------------------------------- /evaluators/huggingface/tests/test_llama_guard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/huggingface/tests/test_llama_guard.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/basic.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/competitor_blocklist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/competitor_blocklist.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/competitor_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/competitor_llm.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/competitor_llm_function_call.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/competitor_llm_function_call.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/exact_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/exact_match.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/llm_answer_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/llm_answer_match.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/llm_boolean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/llm_boolean.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/llm_category.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/llm_category.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/llm_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/llm_score.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/models/product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_84.0_manually_adjusted.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/models/product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_84.0_manually_adjusted.json -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/off_topic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/off_topic.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/query_resolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/query_resolution.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/similarity.py -------------------------------------------------------------------------------- /evaluators/langevals/langevals_langevals/valid_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/langevals_langevals/valid_format.py -------------------------------------------------------------------------------- /evaluators/langevals/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/poetry.lock -------------------------------------------------------------------------------- /evaluators/langevals/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/langevals/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/pyproject.toml -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_basic.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_competitor_blocklist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_competitor_blocklist.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_competitor_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_competitor_llm.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_competitor_llm_function_call.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_competitor_llm_function_call.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_exact_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_exact_match.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_llm_answer_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_llm_answer_match.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_llm_boolean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_llm_boolean.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_llm_category.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_llm_category.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_llm_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_llm_score.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_off_topic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_off_topic.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_query_resolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_query_resolution.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_similarity.py -------------------------------------------------------------------------------- /evaluators/langevals/tests/test_valid_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/langevals/tests/test_valid_format.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/lib/setup_legacy_packages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/lib/setup_legacy_packages.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_answer_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_answer_correctness.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_answer_relevancy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_answer_relevancy.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_context_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_context_precision.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_context_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_context_recall.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_context_relevancy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_context_relevancy.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_context_utilization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_context_utilization.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_faithfulness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_faithfulness.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_lib/common.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/ragas_lib/model_to_langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/ragas_lib/model_to_langchain.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/__init__.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/_analytics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/_analytics.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/_version.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/adaptation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/adaptation.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/async_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/async_utils.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/callbacks.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/embeddings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/embeddings/__init__.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/embeddings/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/embeddings/base.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/evaluation.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/exceptions.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/executor.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/integrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/integrations/langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/integrations/langchain.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/integrations/langsmith.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/integrations/langsmith.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/__init__.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/base.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/json_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/json_load.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/output_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/output_parser.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/llms/prompt.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/__init__.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_answer_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_answer_correctness.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_answer_relevance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_answer_relevance.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_answer_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_answer_similarity.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_entities_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_entities_recall.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_precision.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_recall.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_relevancy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_context_relevancy.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_faithfulness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/_faithfulness.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/base.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/critique.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/metrics/critique.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/run_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/run_config.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/__init__.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/docstore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/docstore.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/evolutions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/evolutions.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/extractor.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/filters.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/generator.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/prompts.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/testset/utils.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/utils.py -------------------------------------------------------------------------------- /evaluators/legacy/langevals_legacy/vendor/legacy_ragas/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/langevals_legacy/vendor/legacy_ragas/validation.py -------------------------------------------------------------------------------- /evaluators/legacy/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/poetry.lock -------------------------------------------------------------------------------- /evaluators/legacy/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/legacy/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/pyproject.toml -------------------------------------------------------------------------------- /evaluators/legacy/tests/test_ragas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/legacy/tests/test_ragas.py -------------------------------------------------------------------------------- /evaluators/lingua/langevals_lingua/language_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/lingua/langevals_lingua/language_detection.py -------------------------------------------------------------------------------- /evaluators/lingua/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/lingua/poetry.lock -------------------------------------------------------------------------------- /evaluators/lingua/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/lingua/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/lingua/pyproject.toml -------------------------------------------------------------------------------- /evaluators/lingua/tests/test_language_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/lingua/tests/test_language_detection.py -------------------------------------------------------------------------------- /evaluators/openai/langevals_openai/moderation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/openai/langevals_openai/moderation.py -------------------------------------------------------------------------------- /evaluators/openai/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/openai/poetry.lock -------------------------------------------------------------------------------- /evaluators/openai/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/openai/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/openai/pyproject.toml -------------------------------------------------------------------------------- /evaluators/openai/tests/test_moderation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/openai/tests/test_moderation.py -------------------------------------------------------------------------------- /evaluators/presidio/.gitignore: -------------------------------------------------------------------------------- 1 | .cache -------------------------------------------------------------------------------- /evaluators/presidio/langevals_presidio/pii_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/presidio/langevals_presidio/pii_detection.py -------------------------------------------------------------------------------- /evaluators/presidio/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/presidio/poetry.lock -------------------------------------------------------------------------------- /evaluators/presidio/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/presidio/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/presidio/pyproject.toml -------------------------------------------------------------------------------- /evaluators/presidio/tests/test_pii_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/presidio/tests/test_pii_detection.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/bleu_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/bleu_score.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/context_f1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/context_f1.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/context_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/context_precision.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/context_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/context_recall.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/factual_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/factual_correctness.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/faithfulness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/faithfulness.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/lib/common.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/lib/model_to_langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/lib/model_to_langchain.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/response_context_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/response_context_precision.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/response_context_recall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/response_context_recall.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/response_relevancy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/response_relevancy.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/rouge_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/rouge_score.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/rubrics_based_scoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/rubrics_based_scoring.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/sql_query_equivalence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/sql_query_equivalence.py -------------------------------------------------------------------------------- /evaluators/ragas/langevals_ragas/summarization_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/langevals_ragas/summarization_score.py -------------------------------------------------------------------------------- /evaluators/ragas/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/poetry.lock -------------------------------------------------------------------------------- /evaluators/ragas/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /evaluators/ragas/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/pyproject.toml -------------------------------------------------------------------------------- /evaluators/ragas/tests/test_ragas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/evaluators/ragas/tests/test_ragas.py -------------------------------------------------------------------------------- /langevals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals/__init__.py -------------------------------------------------------------------------------- /langevals/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals/evaluation.py -------------------------------------------------------------------------------- /langevals/expect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals/expect.py -------------------------------------------------------------------------------- /langevals/pytest_plugins/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /langevals/pytest_plugins/pass_rate_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals/pytest_plugins/pass_rate_plugin.py -------------------------------------------------------------------------------- /langevals/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals/server.py -------------------------------------------------------------------------------- /langevals/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals/utils.py -------------------------------------------------------------------------------- /langevals_core/langevals_core/base_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals_core/langevals_core/base_evaluator.py -------------------------------------------------------------------------------- /langevals_core/langevals_core/litellm_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals_core/langevals_core/litellm_patch.py -------------------------------------------------------------------------------- /langevals_core/langevals_core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals_core/langevals_core/utils.py -------------------------------------------------------------------------------- /langevals_core/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals_core/poetry.lock -------------------------------------------------------------------------------- /langevals_core/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /langevals_core/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/langevals_core/pyproject.toml -------------------------------------------------------------------------------- /notebooks/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/intent_recognition.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/intent_recognition.ipynb -------------------------------------------------------------------------------- /notebooks/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/poetry.lock -------------------------------------------------------------------------------- /notebooks/poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /notebooks/presidio.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/presidio.ipynb -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/data/product-sentiment-dataset.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/data/product-sentiment-dataset.csv -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/data/smartphone-labeled-transformed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/data/smartphone-labeled-transformed.csv -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/data/smartphone-questions-responses.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/data/smartphone-questions-responses.csv -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/data/smartphone-questions.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/data/smartphone-questions.csv -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/data/unrelated-questions.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/data/unrelated-questions.csv -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/product_dataset_builder.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/product_dataset_builder.ipynb -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/product_sentiment_experiment_gpt-3.5-turbo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/product_sentiment_experiment_gpt-3.5-turbo.ipynb -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/product_sentiment_polarity_anthropic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/product_sentiment_polarity_anthropic.ipynb -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/product_sentiment_polarity_openai.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/product_sentiment_polarity_openai.ipynb -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/results/product_sentiment_polarity_anthropic_claude-3-haiku_merry-thankful-spaniel_train_56.0_dev_65.85.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/results/product_sentiment_polarity_anthropic_claude-3-haiku_merry-thankful-spaniel_train_56.0_dev_65.85.json -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/results/product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_80.0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/results/product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_80.0.json -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/results/product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_84.0_manually_adjusted.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/results/product_sentiment_polarity_openai_experiment_gpt-3.5-turbo_cunning-private-pronghorn_train_82.67_dev_84.0_manually_adjusted.json -------------------------------------------------------------------------------- /notebooks/product_sentiment_polarity/results/product_sentiment_polarity_openai_gpt-3.5-turbo_dashing-eccentric-chowchow_train_86.0_dev_87.8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/product_sentiment_polarity/results/product_sentiment_polarity_openai_gpt-3.5-turbo_dashing-eccentric-chowchow_train_86.0_dev_87.8.json -------------------------------------------------------------------------------- /notebooks/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/pyproject.toml -------------------------------------------------------------------------------- /notebooks/tutorials/dspy_rag.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/tutorials/dspy_rag.ipynb -------------------------------------------------------------------------------- /notebooks/tutorials/extensive_unit_testing.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/tutorials/extensive_unit_testing.ipynb -------------------------------------------------------------------------------- /notebooks/tutorials/rag_evaluation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/notebooks/tutorials/rag_evaluation.ipynb -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/poetry.lock -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/check_version_bump.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/scripts/check_version_bump.sh -------------------------------------------------------------------------------- /scripts/generate_evaluator_dependencies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/scripts/generate_evaluator_dependencies.py -------------------------------------------------------------------------------- /scripts/generate_evaluator_descriptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/scripts/generate_evaluator_descriptions.py -------------------------------------------------------------------------------- /scripts/generate_evaluators_ts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/scripts/generate_evaluators_ts.py -------------------------------------------------------------------------------- /scripts/generate_workspace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/scripts/generate_workspace.py -------------------------------------------------------------------------------- /scripts/replace_develop_dependencies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/scripts/replace_develop_dependencies.py -------------------------------------------------------------------------------- /tests/test_azure_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/tests/test_azure_evaluation.py -------------------------------------------------------------------------------- /tests/test_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/tests/test_evaluation.py -------------------------------------------------------------------------------- /tests/test_llm_as_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/tests/test_llm_as_judge.py -------------------------------------------------------------------------------- /tests/test_out_of_the_box_evaluators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/tests/test_out_of_the_box_evaluators.py -------------------------------------------------------------------------------- /tests/test_simple_assertion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/tests/test_simple_assertion.py -------------------------------------------------------------------------------- /ts-integration/evaluators.generated.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langwatch/langevals/HEAD/ts-integration/evaluators.generated.ts --------------------------------------------------------------------------------