├── .coverage ├── .github └── workflows │ ├── build-tag-and-publish.yml │ ├── e2e-cloud-gpu.yml │ ├── e2e-local.yml │ └── test-and-lint.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── examples ├── 1_quickstart.ipynb ├── 2_custom_evaluation_criteria.ipynb ├── 3_evaluation_strategies.ipynb ├── 4_llama_index_evaluators.ipynb ├── 5_evaluate_haystack_rag_pipeline.ipynb ├── 6_langchain_evaluators.ipynb ├── 7_baseten_async_quickstart.ipynb └── sample_data │ └── csr_assistant.json ├── flow_judge ├── __init__.py ├── eval_data_types.py ├── flow_judge.py ├── integrations │ ├── __init__.py │ ├── haystack.py │ ├── langchain.py │ └── llama_index.py ├── metrics │ ├── __init__.py │ ├── metric.py │ └── presets.py ├── models │ ├── __init__.py │ ├── adapters │ │ ├── base.py │ │ └── baseten │ │ │ ├── README.md │ │ │ ├── adapter.py │ │ │ ├── api_auth.py │ │ │ ├── data_io.py │ │ │ ├── deploy.py │ │ │ ├── deployment │ │ │ ├── config.yaml │ │ │ └── model │ │ │ │ ├── __init__.py │ │ │ │ ├── helper.py │ │ │ │ └── model.py │ │ │ ├── errors.py │ │ │ ├── gpu.py │ │ │ ├── management.py │ │ │ ├── token_bucket.py │ │ │ ├── util.py │ │ │ ├── validation.py │ │ │ └── webhook.py │ ├── baseten.py │ ├── common.py │ ├── huggingface.py │ ├── llamafile.py │ └── vllm.py └── utils │ ├── __init__.py │ ├── prompt_formatter.py │ ├── result_writer.py │ └── validators.py ├── img └── flow_judge_banner.png ├── pyproject.toml └── tests ├── README.md ├── e2e-cloud-gpu └── models │ └── adapters │ └── test_baseten_e2e.py ├── e2e-local ├── integrations │ └── test_llama_index_e2e.py └── models │ └── test_llamafile_e2e.py └── unit ├── models ├── adapters │ ├── baseten.py │ ├── gpu.py │ └── validation.py ├── test_baseten.py └── test_llamafile_unit.py ├── test_flow_judge.py ├── test_metrics.py ├── test_utils.py └── utils └── test_result_writer.py /.coverage: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.coverage -------------------------------------------------------------------------------- /.github/workflows/build-tag-and-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.github/workflows/build-tag-and-publish.yml -------------------------------------------------------------------------------- /.github/workflows/e2e-cloud-gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.github/workflows/e2e-cloud-gpu.yml -------------------------------------------------------------------------------- /.github/workflows/e2e-local.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.github/workflows/e2e-local.yml -------------------------------------------------------------------------------- /.github/workflows/test-and-lint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.github/workflows/test-and-lint.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/README.md -------------------------------------------------------------------------------- /examples/1_quickstart.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/1_quickstart.ipynb -------------------------------------------------------------------------------- /examples/2_custom_evaluation_criteria.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/2_custom_evaluation_criteria.ipynb -------------------------------------------------------------------------------- /examples/3_evaluation_strategies.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/3_evaluation_strategies.ipynb -------------------------------------------------------------------------------- /examples/4_llama_index_evaluators.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/4_llama_index_evaluators.ipynb -------------------------------------------------------------------------------- /examples/5_evaluate_haystack_rag_pipeline.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/5_evaluate_haystack_rag_pipeline.ipynb -------------------------------------------------------------------------------- /examples/6_langchain_evaluators.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/6_langchain_evaluators.ipynb -------------------------------------------------------------------------------- /examples/7_baseten_async_quickstart.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/7_baseten_async_quickstart.ipynb -------------------------------------------------------------------------------- /examples/sample_data/csr_assistant.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/examples/sample_data/csr_assistant.json -------------------------------------------------------------------------------- /flow_judge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/__init__.py -------------------------------------------------------------------------------- /flow_judge/eval_data_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/eval_data_types.py -------------------------------------------------------------------------------- /flow_judge/flow_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/flow_judge.py -------------------------------------------------------------------------------- /flow_judge/integrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flow_judge/integrations/haystack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/integrations/haystack.py -------------------------------------------------------------------------------- /flow_judge/integrations/langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/integrations/langchain.py -------------------------------------------------------------------------------- /flow_judge/integrations/llama_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/integrations/llama_index.py -------------------------------------------------------------------------------- /flow_judge/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/metrics/__init__.py -------------------------------------------------------------------------------- /flow_judge/metrics/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/metrics/metric.py -------------------------------------------------------------------------------- /flow_judge/metrics/presets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/metrics/presets.py -------------------------------------------------------------------------------- /flow_judge/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/__init__.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/base.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/README.md -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/adapter.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/api_auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/api_auth.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/data_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/data_io.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/deploy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/deploy.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/deployment/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/deployment/config.yaml -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/deployment/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/deployment/model/__init__.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/deployment/model/helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/deployment/model/helper.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/deployment/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/deployment/model/model.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/errors.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/gpu.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/management.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/management.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/token_bucket.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/token_bucket.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/util.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/validation.py -------------------------------------------------------------------------------- /flow_judge/models/adapters/baseten/webhook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/adapters/baseten/webhook.py -------------------------------------------------------------------------------- /flow_judge/models/baseten.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/baseten.py -------------------------------------------------------------------------------- /flow_judge/models/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/common.py -------------------------------------------------------------------------------- /flow_judge/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/huggingface.py -------------------------------------------------------------------------------- /flow_judge/models/llamafile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/llamafile.py -------------------------------------------------------------------------------- /flow_judge/models/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/models/vllm.py -------------------------------------------------------------------------------- /flow_judge/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flow_judge/utils/prompt_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/utils/prompt_formatter.py -------------------------------------------------------------------------------- /flow_judge/utils/result_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/utils/result_writer.py -------------------------------------------------------------------------------- /flow_judge/utils/validators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/flow_judge/utils/validators.py -------------------------------------------------------------------------------- /img/flow_judge_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/img/flow_judge_banner.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/pyproject.toml -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/README.md -------------------------------------------------------------------------------- /tests/e2e-cloud-gpu/models/adapters/test_baseten_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/e2e-cloud-gpu/models/adapters/test_baseten_e2e.py -------------------------------------------------------------------------------- /tests/e2e-local/integrations/test_llama_index_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/e2e-local/integrations/test_llama_index_e2e.py -------------------------------------------------------------------------------- /tests/e2e-local/models/test_llamafile_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/e2e-local/models/test_llamafile_e2e.py -------------------------------------------------------------------------------- /tests/unit/models/adapters/baseten.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/models/adapters/baseten.py -------------------------------------------------------------------------------- /tests/unit/models/adapters/gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/models/adapters/gpu.py -------------------------------------------------------------------------------- /tests/unit/models/adapters/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/models/adapters/validation.py -------------------------------------------------------------------------------- /tests/unit/models/test_baseten.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/models/test_baseten.py -------------------------------------------------------------------------------- /tests/unit/models/test_llamafile_unit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/models/test_llamafile_unit.py -------------------------------------------------------------------------------- /tests/unit/test_flow_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/test_flow_judge.py -------------------------------------------------------------------------------- /tests/unit/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/test_metrics.py -------------------------------------------------------------------------------- /tests/unit/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/test_utils.py -------------------------------------------------------------------------------- /tests/unit/utils/test_result_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowaicom/flow-judge/HEAD/tests/unit/utils/test_result_writer.py --------------------------------------------------------------------------------