├── .env.example ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yml │ ├── e2e-smoke-test.yml │ ├── fireworks-tracing-tests.yml │ ├── release.yml │ └── rollout.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode ├── extensions.json ├── launch.json └── settings.json ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── assets └── ui.png ├── conf ├── cli │ └── run_eval_config.yaml └── dataset │ ├── apps_full_prompts.yaml │ ├── apps_prompts.yaml │ ├── apps_source.yaml │ ├── base_dataset.yaml │ ├── base_derived_dataset.yaml │ ├── gsm8k.yaml │ ├── gsm8k_full_test_prompts.yaml │ ├── gsm8k_local_prompts.yaml │ ├── gsm8k_math_prompts.yaml │ ├── jsonl_direct.yaml │ ├── xlam_fc_eval_prompts.yaml │ └── xlam_fc_source.yaml ├── development ├── CODING_DATASET.jsonl ├── CONTRIBUTING.md ├── CORE_STRATEGY.md ├── __init__.py ├── cloud_run_vs_cloud_functions_notes.md ├── gsm8k_sample.jsonl ├── managed_simulation_server_plan.md ├── mcp_north_star.md ├── normalize_sandbox_fusion.py ├── notes │ ├── N_variant_generation_design.md │ ├── agent_rl_survey.md │ ├── apps_coding_example_plan.md │ ├── cli_evaluation_refactor_plan.md │ ├── frozen_lake.md │ ├── gigpo_breakdown.md │ ├── hydra_dataset_refactor_plan.md │ ├── math_lighteval_example_plan.md │ ├── mcp_gym_implementation_summary.md │ ├── multi_step_rl_enhancement_plan.md │ ├── multi_step_rl_enhancement_plan_overview.md │ ├── multi_step_rl_enhancement_plan_phase1.md │ ├── multi_step_rl_enhancement_plan_phase2.md │ ├── multi_step_rl_enhancement_plan_phase3.md │ ├── north_star_mcp_gym.md │ ├── plan_forkable_filesystem_rl_scenario.md │ ├── pytest_integration_proposal.md │ ├── unify_gymnasium_api.md │ └── verl_replication_playbook.md ├── readiness │ ├── coding_example.md │ ├── composite_math_coding_example.md │ ├── cross_cutting_refinements.md │ ├── function_calling_example.md │ ├── math_gsm8k.md │ ├── math_openr1.md │ └── remote_evaluation_setup_plan.md ├── record_and_playback_design.md ├── record_replay_testing_handoff.md ├── system_architecture │ └── OVERVIEW.md ├── unified_evaluation_metadata_proposal.md └── utils │ ├── __init__.py │ ├── generate_api_key.py │ └── subprocess_manager.py ├── docs ├── DOCUMENTATION_STATUS.mdx ├── api_reference │ ├── api_overview.mdx │ ├── data_models.mdx │ ├── reward_function_class.mdx │ └── reward_function_decorator.mdx ├── cli_reference │ └── cli_overview.mdx ├── cross_process_events.md ├── dataset_configuration_guide.md ├── developer_guide │ ├── agent_evaluation.mdx │ ├── core_data_types.mdx │ ├── evaluation_workflows.mdx │ ├── getting_started.mdx │ ├── hydra_configuration.mdx │ ├── images │ │ ├── create_evaluator.png │ │ ├── create_evaluator_data.png │ │ └── list_of_evaluators.png │ ├── implementation_notes.mdx │ └── reward_function_anatomy.mdx ├── documentation_home.mdx ├── examples │ ├── advanced_examples │ │ ├── code_execution_with_e2b.mdx │ │ └── math_evaluation.mdx │ ├── apps_coding_example.mdx │ ├── examples_overview.mdx │ ├── gcp_cloud_run_deployment_example.mdx │ ├── math_with_format_and_length_example.mdx │ ├── math_with_formatting_example.mdx │ └── tool_calling_example.mdx ├── integrations │ └── trl_integration_overview.mdx ├── intro.png ├── main_screen.png ├── n_variant_batch_evaluation_guide.md └── tutorials │ ├── best_practices.mdx │ ├── creating_your_first_reward_function.mdx │ └── evaluating_model_responses.mdx ├── eval_protocol ├── __init__.py ├── __main__.py ├── _version.py ├── adapters │ ├── CONTRIBUTING.md │ ├── __init__.py │ ├── base.py │ ├── bigquery.py │ ├── braintrust.py │ ├── fireworks_tracing.py │ ├── huggingface.py │ ├── langchain.py │ ├── langfuse.py │ ├── langsmith.py │ ├── openai_responses.py │ ├── trl.py │ ├── utils.py │ └── weave.py ├── agent │ ├── __init__.py │ ├── models.py │ ├── orchestrator.py │ ├── resource_abc.py │ ├── resource_pool.py │ ├── resources │ │ ├── __init__.py │ │ ├── bfcl_envs │ │ │ ├── __init__.py │ │ │ ├── gorilla_file_system.py │ │ │ ├── math_api.py │ │ │ └── posting_api.py │ │ ├── bfcl_sim_api_resource.py │ │ ├── docker_resource.py │ │ ├── filesystem_resource.py │ │ ├── python_state_resource.py │ │ └── sql_resource.py │ ├── task_manager.py │ └── tool_registry.py ├── auth.py ├── benchmarks │ ├── __init__.py │ ├── data │ │ ├── airline_dataset.jsonl │ │ └── retail_dataset.jsonl │ ├── test_aime25.py │ ├── test_frozen_lake.py │ ├── test_glm_streaming_compliance.py │ ├── test_gpqa.py │ ├── test_livebench_data_analysis.py │ ├── test_tau_bench_airline.py │ └── test_tau_bench_retail.py ├── cli.py ├── cli_commands │ ├── __init__.py │ ├── agent_eval_cmd.py │ ├── common.py │ ├── create_rft.py │ ├── deploy.py │ ├── deploy_mcp.py │ ├── local_test.py │ ├── logs.py │ ├── preview.py │ ├── run_eval_cmd.py │ ├── upload.py │ └── utils.py ├── common_utils.py ├── config.py ├── data_loader │ ├── __init__.py │ ├── dynamic_data_loader.py │ ├── factory_data_loader.py │ ├── inline_data_loader.py │ ├── jsonl_data_loader.py │ └── models.py ├── dataset_logger │ ├── __init__.py │ ├── dataset_logger.py │ ├── local_fs_dataset_logger_adapter.py │ ├── sqlite_dataset_logger_adapter.py │ └── sqlite_evaluation_row_store.py ├── datasets │ ├── __init__.py │ └── loader.py ├── directory_utils.py ├── evaluation.py ├── event_bus │ ├── __init__.py │ ├── event_bus.py │ ├── logger.py │ ├── sqlite_event_bus.py │ └── sqlite_event_bus_database.py ├── exceptions.py ├── execution │ ├── __init__.py │ └── pipeline.py ├── fireworks_rft.py ├── gcp_tools.py ├── generation │ ├── cache.py │ ├── clients.py │ └── clients │ │ └── base.py ├── generic_server.py ├── get_pep440_version.py ├── human_id │ ├── __init__.py │ └── dictionary.py ├── integrations │ ├── __init__.py │ ├── deepeval.py │ ├── openai_rft.py │ ├── openeval.py │ ├── tinker_cookbook.py │ ├── tinker_rollout_processor.py │ └── trl.py ├── log_utils │ ├── __init__.py │ ├── elasticsearch_client.py │ ├── elasticsearch_direct_http_handler.py │ ├── elasticsearch_index_manager.py │ ├── fireworks_tracing_http_handler.py │ ├── init.py │ ├── rollout_context.py │ ├── rollout_id_filter.py │ └── util.py ├── logging_utils.py ├── mcp │ ├── __init__.py │ ├── adapter.py │ ├── client │ │ ├── __init__.py │ │ └── connection.py │ ├── clients.py │ ├── execution │ │ ├── __init__.py │ │ ├── base_policy.py │ │ ├── manager.py │ │ ├── policy.py │ │ └── vllm_policy.py │ ├── grid_renderer.py │ ├── mcp_multi_client.py │ ├── mcpgym.py │ ├── process_manager.py │ ├── session │ │ ├── __init__.py │ │ └── manager.py │ ├── simple_process_manager.py │ └── simulation_server.py ├── mcp_agent │ ├── __init__.py │ ├── config.py │ ├── main.py │ └── orchestration │ │ ├── __init__.py │ │ ├── base_client.py │ │ ├── local_docker_client.py │ │ └── stdio_mcp_client_helper.py ├── mcp_env.py ├── mcp_servers │ ├── __init__.py │ ├── frozen_lake │ │ ├── frozen_lake_adapter.py │ │ ├── frozen_lake_mcp.py │ │ └── server.py │ └── tau2 │ │ ├── README.md │ │ ├── __init__.py │ │ ├── airplane_environment │ │ └── airline_environment.py │ │ ├── mock_environment │ │ └── mock_environment.py │ │ ├── retail_environment │ │ └── retail_environment.py │ │ ├── server.py │ │ ├── tau2_mcp.py │ │ └── tests │ │ ├── system_prompts │ │ ├── airline_agent_system_prompt.md │ │ ├── mock_agent_system_prompt.md │ │ └── retail_agent_system_prompt.md │ │ └── test_tau2_e2e.py ├── models.py ├── packaging.py ├── platform_api.py ├── playback_policy.py ├── proxy │ ├── .env.example │ ├── Dockerfile.gateway │ ├── README.md │ ├── __init__.py │ ├── config_no_cache.yaml │ ├── docker-compose.yml │ ├── proxy_core │ │ ├── __init__.py │ │ ├── app.py │ │ ├── auth.py │ │ ├── langfuse.py │ │ ├── litellm.py │ │ ├── main.py │ │ ├── models.py │ │ ├── redis_utils.py │ │ └── secrets.yaml.example │ └── requirements.txt ├── pytest │ ├── __init__.py │ ├── default_agent_rollout_processor.py │ ├── default_dataset_adapter.py │ ├── default_langchain_rollout_processor.py │ ├── default_mcp_gym_rollout_processor.py │ ├── default_no_op_rollout_processor.py │ ├── default_pydantic_ai_rollout_processor.py │ ├── default_single_turn_rollout_process.py │ ├── dual_mode_wrapper.py │ ├── elasticsearch_setup.py │ ├── evaluation_test.py │ ├── evaluation_test_postprocess.py │ ├── evaluation_test_utils.py │ ├── exception_config.py │ ├── execution.py │ ├── generate_parameter_combinations.py │ ├── github_action_rollout_processor.py │ ├── handle_persist_flow.py │ ├── integrations │ │ └── openenv_trl_vllm.py │ ├── openenv_rollout_processor.py │ ├── parameterize.py │ ├── plugin.py │ ├── remote_rollout_processor.py │ ├── rollout_processor.py │ ├── store_experiment_link.py │ ├── store_results_url.py │ ├── tracing_utils.py │ ├── types.py │ └── validate_signature.py ├── quickstart │ ├── __init__.py │ ├── aha_judge │ │ ├── __init__.py │ │ ├── llm_judge.py │ │ ├── llm_judge_braintrust.py │ │ ├── llm_judge_langfuse.py │ │ ├── llm_judge_langsmith.py │ │ ├── llm_judge_openai_responses.py │ │ └── utils.py │ ├── llm_judge.py │ ├── llm_judge_braintrust.py │ ├── svg_agent │ │ ├── evaluator │ │ │ ├── Dockerfile │ │ │ ├── requirements.txt │ │ │ ├── svgbench_dataset.jsonl │ │ │ ├── test_svgagent.py │ │ │ └── utils.py │ │ ├── vercel_svg_server │ │ │ ├── .env.example │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── api │ │ │ │ └── init.py │ │ │ ├── requirements.txt │ │ │ └── vercel.json │ │ └── vercel_svg_server_ts │ │ │ ├── .gitignore │ │ │ ├── api │ │ │ └── init.ts │ │ │ ├── src │ │ │ ├── config │ │ │ │ └── environment.ts │ │ │ └── models │ │ │ │ ├── exceptions.ts │ │ │ │ ├── status.ts │ │ │ │ └── types.ts │ │ │ ├── tsconfig.json │ │ │ └── vercel.json │ └── utils.py ├── resources.py ├── reward_function.py ├── rewards │ ├── __init__.py │ ├── accuracy.py │ ├── accuracy_length.py │ ├── apps_coding_reward.py │ ├── apps_execution_utils.py │ ├── apps_testing_util.py │ ├── bfcl_reward.py │ ├── code_execution.py │ ├── code_execution_utils.py │ ├── cpp_code.py │ ├── deepcoder_reward.py │ ├── format.py │ ├── function_calling.py │ ├── json_schema.py │ ├── language_consistency.py │ ├── lean_prover.py │ ├── length.py │ ├── list_comparison_math_reward.py │ ├── math.py │ ├── multiple_choice_math_reward.py │ ├── reasoning_steps.py │ ├── repetition.py │ └── tag_count.py ├── rl_processing.py ├── server.py ├── stats │ ├── __init__.py │ └── confidence_intervals.py ├── typed_interface.py ├── types │ ├── __init__.py │ ├── errors.py │ ├── remote_rollout_processor.py │ └── types.py └── utils │ ├── __init__.py │ ├── batch_evaluation.py │ ├── batch_transformation.py │ ├── browser_utils.py │ ├── check_server_status.py │ ├── dataset_helpers.py │ ├── evaluation_row_utils.py │ ├── logs_models.py │ ├── logs_server.py │ ├── module_loader.py │ ├── packaging_utils.py │ ├── show_results_url.py │ ├── static_policy.py │ ├── subprocess_utils.py │ └── vite_server.py ├── examples ├── CLEAN_FRAMEWORK_SUMMARY.md ├── README.md ├── UNIFIED_FRAMEWORK_SUMMARY.md ├── __init__.py ├── adapters │ ├── README.md │ ├── gsm8k_replacement_example.py │ ├── huggingface_example.py │ └── langfuse_example.py ├── aime2025_chat_completion │ ├── README.md │ ├── __init__.py │ └── main.py ├── apps_coding_example │ ├── README.md │ ├── conf │ │ ├── __init__.py │ │ ├── run_eval.yaml │ │ └── simple_apps_eval.yaml │ └── main.py ├── blackjack_mcp │ ├── __init__.py │ ├── blackjack_adapter.py │ ├── blackjack_mcp.py │ ├── server.py │ ├── shared_data │ │ └── rollouts.jsonl │ └── tests │ │ ├── recordings │ │ └── playback_only_test.jsonl │ │ └── test_record_and_replay_e2e.py ├── cliff_walking_mcp │ ├── __init__.py │ ├── cliff_walking_adapter.py │ ├── cliff_walking_mcp.py │ ├── server.py │ ├── shared_data │ │ └── rollouts.jsonl │ └── tests │ │ ├── recordings │ │ ├── fireworks_multi_env_trajectory.jsonl │ │ ├── multi_env_trajectory.jsonl │ │ ├── playback_only_test.jsonl │ │ └── production_trajectory.jsonl │ │ └── test_cliff_walking_e2e.py ├── e2b_auto_extract_example.py ├── e2b_fallback_example.py ├── e2b_javascript_example.py ├── e2b_reward_example.py ├── frozen_lake_mcp │ ├── README.md │ ├── README_TERMINATION_FIX.md │ ├── __init__.py │ ├── frozen_lake_adapter.py │ ├── frozen_lake_mcp.py │ ├── frozen_lake_mcp_simplified.py │ ├── requirements.txt │ ├── rollout_example.py │ ├── server.py │ ├── server_logs.txt │ ├── shared_data │ │ └── rollouts.jsonl │ ├── test_basic_functionality.py │ ├── test_multi_session.py │ ├── test_north_star.py │ ├── test_rollout_termination.py │ ├── test_seed_logging.py │ ├── test_termination_fix.py │ ├── test_validation_logic.py │ └── tests │ │ ├── recordings │ │ ├── README.md │ │ ├── fireworks_multi_env_trajectory.jsonl │ │ ├── multi_env_trajectory.jsonl │ │ ├── playback_only_test.jsonl │ │ └── production_trajectory_broken.jsonl.bak │ │ └── test_frozen_lake_e2e.py ├── gcp_cloud_run_deployment_example │ ├── README.md │ ├── dummy_rewards.py │ └── rewardkit.example.yaml ├── gpqa │ └── tests │ │ └── test_gpqa.py ├── healthbench │ └── tests │ │ └── test_evaluation.py ├── langgraph │ ├── __init__.py │ ├── data │ │ └── simple_prompts.jsonl │ ├── reasoning_gpt_oss_120b_graph.py │ ├── simple_graph.py │ ├── test_langgraph_rollout.py │ ├── test_reasoning_rollout.py │ ├── test_tools_langsmith_trace.py │ └── tools_graph.py ├── lunar_lander_mcp │ ├── README.md │ ├── generate_sample_images.py │ ├── lunar_lander_adapter.py │ ├── lunar_lander_mcp.py │ ├── requirements.txt │ ├── sample_trajectory │ │ ├── step_000_initial.png │ │ ├── step_001_nothing.png │ │ ├── step_002_nothing.png │ │ ├── step_003_fire_main.png │ │ ├── step_004_fire_left.png │ │ ├── step_005_fire_main.png │ │ ├── step_006_fire_right.png │ │ ├── step_007_fire_main.png │ │ ├── step_008_nothing.png │ │ ├── step_009_fire_main.png │ │ ├── step_010_nothing.png │ │ └── trajectory_summary.json │ ├── server.py │ ├── simple_trajectory_test.py │ ├── test_lunar_lander_conda.py │ └── tests │ │ ├── recordings │ │ ├── fireworks_multi_env_trajectory.jsonl │ │ ├── multi_env_trajectory.jsonl │ │ ├── playback_only_test.jsonl │ │ └── production_trajectory.jsonl │ │ └── test_lunar_lander_e2e.py ├── math_example │ ├── README.md │ ├── conf │ │ └── simple_math_eval.yaml │ └── main.py ├── math_with_format_and_length │ ├── README.md │ ├── conf │ │ └── simple_math_format_length_eval.yaml │ └── main.py ├── math_with_formatting │ ├── README.md │ ├── conf │ │ └── simple_math_formatting_eval.yaml │ └── main.py ├── mcp_agent_filesystem_rl │ ├── README.md │ ├── config.yaml │ ├── dataset.jsonl │ ├── main.py │ ├── templates │ │ └── workspace │ │ │ ├── backup_dir │ │ │ └── .gitkeep │ │ │ ├── scenario_copy_001 │ │ │ └── source_dir │ │ │ │ ├── file_to_move.txt │ │ │ │ └── sample.txt │ │ │ ├── scenario_create_001 │ │ │ └── source_dir │ │ │ │ ├── file_to_move.txt │ │ │ │ └── sample.txt │ │ │ ├── scenario_move_001 │ │ │ └── source_dir │ │ │ │ ├── file_to_move.txt │ │ │ │ └── sample.txt │ │ │ ├── source_dir │ │ │ ├── file_to_move.txt │ │ │ └── sample.txt │ │ │ └── target_dir │ │ │ └── .gitkeep │ ├── test_example.py │ └── user_simulator.py ├── metrics │ ├── custom_temp_metrics │ │ └── deepseek_prover │ │ │ └── main.py │ ├── llm_resource_example │ │ ├── README.md │ │ ├── conf │ │ │ └── simple_llm_judge_eval.yaml │ │ └── main.py │ └── word_count │ │ ├── conf │ │ └── run_word_count_test.yaml │ │ ├── dummy_sample.jsonl │ │ └── main.py ├── n_variant_example.yaml ├── n_variant_to_batch_demo.py ├── openai_rft │ ├── example_rapidfuzz.py │ └── test_openai_grader.py ├── rollout_control_plane_demo.py ├── row_wise │ └── dummy_example │ │ └── dummy_rewards.py ├── tau2_mcp │ ├── README.md │ ├── airplane_environment │ │ ├── airline_environment.py │ │ └── db.json │ ├── mock_environment │ │ ├── db.json │ │ └── mock_environment.py │ ├── retail_environment │ │ ├── db.json │ │ └── retail_environment.py │ ├── server.py │ ├── tau2_mcp.py │ └── tests │ │ ├── datasets │ │ ├── airline.json │ │ ├── airline_full.json │ │ ├── mock.json │ │ └── retail.json │ │ ├── recordings │ │ └── fireworks_multi_env_airline_trajectory.jsonl │ │ ├── system_prompts │ │ ├── airline_agent_system_prompt.md │ │ ├── mock_agent_system_prompt.md │ │ └── retail_agent_system_prompt.md │ │ └── test_tau2_e2e.py ├── taxi_mcp_complete │ ├── README.md │ ├── local_testing │ │ ├── clean_openai_format.jsonl │ │ └── test_north_star.py │ ├── mcp_server │ │ ├── requirements.txt │ │ ├── simulation_server.py │ │ ├── taxi_adapter.py │ │ └── taxi_mcp_server.py │ ├── shared_data │ │ └── taxi_rollouts.jsonl │ └── tests │ │ ├── conftest.py │ │ └── test_taxi_e2e.py ├── test_tasks │ ├── __init__.py │ ├── reward.py │ ├── task1.yaml │ ├── task2.yaml │ └── tools.py ├── test_unified_framework_final.py ├── test_unified_servers.py ├── tinker_math_rl │ ├── README.md │ ├── plot_metrics.py │ ├── reward_plot_integration_v5.png │ ├── reward_plot_integration_v9.png │ ├── reward_plot_refactored.png │ ├── test_gsm8k_eval.py │ └── train.py ├── tool_calling_example │ ├── README.md │ ├── conf │ │ └── simple_tool_calling_eval.yaml │ ├── dataset.jsonl │ └── main.py ├── tracing │ ├── __init__.py │ └── weave │ │ ├── __init__.py │ │ └── converter.py └── trl_integration │ ├── __init__.py │ ├── convert_dataset_to_jsonl.py │ ├── data │ ├── deepcoder_mvp_transformed_sample.jsonl │ ├── simulated_deepcoder_raw_sample.jsonl │ └── simulated_deepcoder_transformed_sample.jsonl │ ├── grpo_example.py │ ├── minimal_deepcoder_grpo_example.py │ ├── ppo_example.py │ ├── test_trl_integration.py │ ├── trl_adapter.py │ └── working_grpo_example.py ├── local_evals ├── datasets │ └── airline.json └── model_comparison_eval.ipynb ├── pyproject.toml ├── pytest.ini ├── requirements-dev.txt ├── scripts ├── convert_apps_to_prompts.py ├── convert_bfcl_dataset.py ├── create_sample_gsm8k_jsonl.py ├── migrate_datasets.py ├── validate_dev_tracing.py ├── validate_remote.py └── verify_logging_locally.py ├── setup.cfg ├── setup.py ├── test_event_bus_helper.py ├── tests ├── __init__.py ├── adapters │ ├── __snapshots__ │ │ └── test_openai_responses_adapter.ambr │ ├── test_braintrust_adapter.py │ ├── test_langfuse_adapter.py │ ├── test_langsmith_adapter.py │ ├── test_openai_responses_adapter.py │ └── test_weave_adapter.py ├── chinook │ ├── Chinook_PostgreSql.sql │ ├── braintrust │ │ ├── generate_traces.py │ │ └── test_braintrust_chinook.py │ ├── dataset.py │ ├── dataset │ │ ├── task_1 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_10 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_11 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_12 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_2 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_3 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_4 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_5 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_6 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_7 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ ├── task_8 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ │ └── task_9 │ │ │ ├── ground_truth.md │ │ │ └── task.txt │ ├── db.py │ ├── langfuse │ │ ├── generate_traces.py │ │ └── test_langfuse_chinook.py │ ├── langgraph │ │ ├── graph.py │ │ ├── test_langgraph_chinook.py │ │ ├── test_langgraph_chinook_tools.py │ │ └── tools_graph.py │ ├── langsmith │ │ ├── generate_traces.py │ │ └── test_langsmith_chinook.py │ ├── pydantic │ │ ├── agent.py │ │ ├── test_pydantic_chinook.py │ │ ├── test_pydantic_complex_queries.py │ │ └── test_pydantic_complex_queries_responses.py │ └── tasks.jsonl ├── cli_commands │ ├── test_common.py │ ├── test_deploy_cmd.py │ └── test_preview_cmd.py ├── conftest.py ├── data_loader │ ├── test_data_loader_stable_row_id.py │ ├── test_dynamic_data_loader.py │ └── test_inline_data_loader.py ├── dataset_logger │ ├── .gitignore │ └── test_sqlite_dataset_logger_adapter.py ├── dummy_module_for_server_test.py ├── eval_protocol_tests │ ├── README.md │ ├── demo_dual_imports.py │ ├── test_eval_protocol_simple.py │ ├── test_import_equivalence.py │ └── test_minimal_structure.py ├── execution │ └── test_pipeline.py ├── generation │ ├── test_cache.py │ └── test_clients.py ├── github_actions │ ├── __init__.py │ ├── quickstart.py │ ├── rollout_worker.py │ └── test_github_actions_rollout.py ├── logging │ ├── test_elasticsearch_direct_http_handler.py │ └── test_rollout_context_logging.py ├── mcp_agent │ ├── __init__.py │ ├── mock_mcp_server_image │ │ ├── Dockerfile │ │ └── mock_server.py │ ├── orchestration │ │ ├── __init__.py │ │ └── test_local_docker_client.py │ └── test_rl_filesystem_scenario.py ├── mocks │ └── verifiers │ │ ├── __init__.py │ │ └── envs │ │ ├── __init__.py │ │ └── bfcl_envs │ │ ├── __init__.py │ │ ├── gorilla_file_system.py │ │ └── posting_api.py ├── pytest │ ├── data │ │ ├── airline_dataset.jsonl │ │ ├── apps_sample_dataset.jsonl │ │ ├── basic_coding_dataset.jsonl │ │ ├── frozen_lake_dataset.jsonl │ │ ├── function_calling.jsonl │ │ ├── halueval_sample_dataset.jsonl │ │ ├── import_data.jsonl │ │ ├── json_schema.jsonl │ │ ├── lunar_lander_dataset.jsonl │ │ ├── markdown_dataset.jsonl │ │ ├── mcp_config.jsonl │ │ ├── retail_dataset.jsonl │ │ ├── svgbench_dataset.jsonl │ │ └── svgbench_sample_dataset.jsonl │ ├── datasets │ │ └── gmail_inbox.jsonl │ ├── gsm8k │ │ ├── requirements.txt │ │ └── test_pytest_math_example.py │ ├── helper │ │ ├── gsm8k_to_evaluation_row.py │ │ └── word_count_to_evaluation_row.py │ ├── mcp │ │ └── mock_discord.py │ ├── mcp_configurations │ │ ├── docs_mcp_config.json │ │ ├── docs_mcp_config_broken.json │ │ ├── klavis_strata_mcp.json │ │ └── mock_discord_mcp_config.json │ ├── test_apps_coding.py │ ├── test_basic_coding.py │ ├── test_execution_metadata.py │ ├── test_frozen_lake.py │ ├── test_get_metadata.py │ ├── test_hallucination.py │ ├── test_langgraph_processor.py │ ├── test_livesvgbench.py │ ├── test_lunar_lander.py │ ├── test_markdown_highlighting.py │ ├── test_mcp_session_autocreate.py │ ├── test_openenv_browsergym_basic.py │ ├── test_openenv_browsergym_eval.py │ ├── test_openenv_echo_hub.py │ ├── test_openenv_textarena_docker.py │ ├── test_parameterize_validation.py │ ├── test_parameterized_ids.py │ ├── test_pydantic_agent.py │ ├── test_pydantic_ai_metadata_handling.py │ ├── test_pydantic_multi_agent.py │ ├── test_pytest_assertion_error_no_new_rollouts.py │ ├── test_pytest_async.py │ ├── test_pytest_default_agent_rollout_processor.py │ ├── test_pytest_ensure_logging.py │ ├── test_pytest_env_overwrite.py │ ├── test_pytest_evaluator_error_handling.py │ ├── test_pytest_flaky_sometimes.py │ ├── test_pytest_function_calling.py │ ├── test_pytest_groupwise.py │ ├── test_pytest_ids.py │ ├── test_pytest_import_external_logs.py │ ├── test_pytest_input_messages.py │ ├── test_pytest_input_rows.py │ ├── test_pytest_input_rows_parametrized_completion_params.py │ ├── test_pytest_json_schema.py │ ├── test_pytest_klavis_mcp.py │ ├── test_pytest_math_format_length.py │ ├── test_pytest_mcp_config.py │ ├── test_pytest_mcp_url.py │ ├── test_pytest_missing_evaluation_result.py │ ├── test_pytest_propagate_error.py │ ├── test_pytest_stable_row_id.py │ ├── test_pytest_word_count_example.py │ ├── test_single_turn_rollout_processor.py │ ├── test_svgbench.py │ ├── test_tau_bench_airline.py │ ├── test_tool_response_single_string.py │ └── test_utils.py ├── remote_server │ ├── quickstart.py │ ├── remote_server.py │ ├── remote_server_multi_turn.py │ ├── test_remote_fireworks.py │ ├── test_remote_fireworks_propagate_status.py │ ├── test_remote_langfuse.py │ └── typescript-server │ │ ├── .gitignore │ │ ├── README.md │ │ ├── bun.lock │ │ ├── env.ts │ │ ├── instrumentation.ts │ │ ├── package.json │ │ ├── pnpm-lock.yaml │ │ ├── server.ts │ │ └── tsconfig.json ├── rewards │ └── test_apps_coding_reward.py ├── test_accuracy.py ├── test_accuracy_length.py ├── test_adapters_e2e.py ├── test_agent_orchestrator.py ├── test_agent_resources.py ├── test_auth.py ├── test_batch_evaluation.py ├── test_cli.py ├── test_cli_agent.py ├── test_cli_args.py ├── test_cli_create_rft.py ├── test_cli_local_test.py ├── test_code_execution.py ├── test_config.py ├── test_control_plane_separation.py ├── test_cpp_code.py ├── test_data_driven_task_manager.py ├── test_deepcoder_reward.py ├── test_deepeval_integration.py ├── test_deploy_integration.py ├── test_directory_utils.py ├── test_e2b_integration.py ├── test_e2b_js_integration.py ├── test_edge_cases.py ├── test_ep_upload_e2e.py ├── test_eval_protocol_import.py ├── test_evaluation.py ├── test_evaluation_integration.py ├── test_evaluation_postprocess.py ├── test_evaluation_preview_integration.py ├── test_event_bus.py ├── test_event_bus_helper.py ├── test_examples_end_to_end.py ├── test_exceptions.py ├── test_fireworks_api.py ├── test_format.py ├── test_fractional_code.py ├── test_function_calling.py ├── test_gcp_tools.py ├── test_generic_server.py ├── test_human_id.py ├── test_integration.py ├── test_json_schema.py ├── test_kwargs_validation.py ├── test_language_consistency.py ├── test_lean_prover.py ├── test_lean_prover_runner.py ├── test_length.py ├── test_list_comparison_math_reward.py ├── test_logs_server.py ├── test_logs_server_simple.py ├── test_math.py ├── test_message_field_filtering.py ├── test_minimal.py ├── test_models.py ├── test_models_rl.py ├── test_multiple_choice_math_reward.py ├── test_n_variant_batch_integration.py ├── test_n_variant_integration.py ├── test_openai_compatibility.py ├── test_openai_rft_integration.py ├── test_openeval_integration.py ├── test_packaging.py ├── test_parallel_rollouts.py ├── test_platform_api.py ├── test_quickstart_utils.py ├── test_readiness.py ├── test_reasoning_steps.py ├── test_repetition.py ├── test_repetition_debug.py ├── test_retry_mechanism.py ├── test_reward_function.py ├── test_reward_protocol_import.py ├── test_rl_processing.py ├── test_rollout_control_plane_integration.py ├── test_server.py ├── test_show_results_url.py ├── test_status_migration_changes.py ├── test_status_migration_integration.py ├── test_status_model.py ├── test_tag_count.py ├── test_tau_bench_airline_smoke.py ├── test_typed_interface.py ├── test_typed_interface_rl.py ├── test_upload_entrypoint.py ├── test_url_handling.py └── test_vite_server.py ├── typescript ├── .cursor │ └── rules │ │ └── use-bun-instead-of-node-vite-npm-pnpm.mdc ├── .gitignore ├── README.md ├── bun.lock ├── index.ts ├── logging │ ├── fireworks-transport.ts │ ├── fireworks-vercel.ts │ └── logger.ts ├── models │ ├── exceptions.ts │ ├── status.ts │ └── types.ts ├── package.json ├── pnpm-lock.yaml └── tsconfig.json ├── uv.lock ├── vendor └── tau2 │ ├── __init__.py │ ├── agent │ ├── README.md │ ├── __init__.py │ ├── base.py │ └── llm_agent.py │ ├── api_service │ ├── __init__.py │ ├── api_config.py │ ├── data_model.py │ └── simulation_service.py │ ├── cli.py │ ├── config.py │ ├── data │ ├── domains │ │ ├── airline │ │ │ ├── db.json │ │ │ ├── policy.md │ │ │ └── tasks.json │ │ ├── mock │ │ │ ├── db.json │ │ │ ├── policy.md │ │ │ ├── policy_solo.md │ │ │ └── tasks.json │ │ ├── retail │ │ │ ├── db.json │ │ │ ├── policy.md │ │ │ └── tasks.json │ │ └── telecom │ │ │ ├── db.toml │ │ │ ├── main_policy.md │ │ │ ├── main_policy_solo.md │ │ │ ├── tasks.json │ │ │ ├── tasks_full.json │ │ │ ├── tasks_small.json │ │ │ ├── tech_support_manual.md │ │ │ ├── tech_support_workflow.md │ │ │ ├── tech_support_workflow_solo.md │ │ │ └── user_db.toml │ └── user_simulator │ │ ├── simulation_guidelines.md │ │ └── simulation_guidelines_tools.md │ ├── data_model │ ├── __init__.py │ ├── message.py │ ├── simulation.py │ └── tasks.py │ ├── domains │ ├── README.md │ ├── __init__.py │ ├── airline │ │ ├── __init__.py │ │ ├── data_model.py │ │ ├── environment.py │ │ ├── tasks.json │ │ ├── tools.py │ │ └── utils.py │ ├── mock │ │ ├── __init__.py │ │ ├── data_model.py │ │ ├── environment.py │ │ ├── tools.py │ │ └── utils.py │ ├── retail │ │ ├── __init__.py │ │ ├── data_model.py │ │ ├── environment.py │ │ ├── tools.py │ │ └── utils.py │ └── telecom │ │ ├── __init__.py │ │ ├── data_model.py │ │ ├── environment.py │ │ ├── tasks │ │ ├── __init__.py │ │ ├── const.py │ │ ├── create_tasks.py │ │ ├── manager.py │ │ ├── mms_issues.py │ │ ├── mobile_data_issues.py │ │ ├── service_issues.py │ │ └── utils.py │ │ ├── tools.py │ │ ├── user_data_model.py │ │ ├── user_tools.py │ │ └── utils.py │ ├── environment │ ├── __init__.py │ ├── db.py │ ├── environment.py │ ├── server.py │ ├── tool.py │ ├── toolkit.py │ └── utils │ │ └── interface_agent.py │ ├── evaluator │ ├── __init__.py │ ├── evaluator.py │ ├── evaluator_action.py │ ├── evaluator_base.py │ ├── evaluator_communicate.py │ ├── evaluator_env.py │ └── evaluator_nl_assertions.py │ ├── metrics │ ├── __init__.py │ ├── agent_metrics.py │ └── break_down_metrics.py │ ├── orchestrator │ ├── __init__.py │ ├── environment_manager.py │ ├── orchestrator.py │ └── utils.py │ ├── registry.py │ ├── run.py │ ├── scripts │ ├── __init__.py │ ├── check_data.py │ ├── show_domain_doc.py │ ├── start_servers.py │ └── view_simulations.py │ ├── user │ ├── __init__.py │ ├── base.py │ └── user_simulator.py │ └── utils │ ├── __init__.py │ ├── display.py │ ├── io_utils.py │ ├── llm_utils.py │ ├── pydantic_utils.py │ └── utils.py ├── versioneer.py └── vite-app ├── .gitignore ├── CHART_EXPORT_README.md ├── data ├── logs.json └── logs.jsonl ├── dist ├── assets │ ├── favicon-BkAAWQga.png │ ├── index-BIhepl19.css │ ├── index-DaovgarD.js │ ├── index-DaovgarD.js.map │ └── logo-light-BprIBJQW.png └── index.html ├── docs └── runtime-configuration.md ├── eslint.config.js ├── index.html ├── package.json ├── pnpm-lock.yaml ├── src ├── App.tsx ├── GlobalState.tsx ├── assets │ └── logo-light.png ├── components │ ├── Button.tsx │ ├── ChartExport.tsx │ ├── ChatInterface.tsx │ ├── Dashboard.tsx │ ├── EvaluationRow.tsx │ ├── EvaluationTable.tsx │ ├── FilterInput.tsx │ ├── FilterSelector.tsx │ ├── JSONTooltip.tsx │ ├── LogsSection.tsx │ ├── MessageBubble.tsx │ ├── MetadataSection.tsx │ ├── PivotTab.tsx │ ├── PivotTable.tsx │ ├── SearchableSelect.tsx │ ├── Select.tsx │ ├── StatusIndicator.tsx │ ├── TabButton.tsx │ ├── TableContainer.tsx │ └── Tooltip.tsx ├── config.ts ├── favicon.png ├── hooks │ └── usePivotData.ts ├── index.css ├── main.tsx ├── styles │ └── common.ts ├── types │ ├── README.md │ ├── configs.ts │ ├── eval-protocol.ts │ ├── global.d.ts │ └── websocket.ts ├── typings.d.ts └── util │ ├── __snapshots__ │ └── flatten-json.test.ts.snap │ ├── field-processors.ts │ ├── filter-utils.ts │ ├── flatten-json.test.ts │ ├── flatten-json.ts │ ├── pivot.test.ts │ ├── pivot.ts │ ├── query-params.test.ts │ └── query-params.ts ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts /.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.env.example -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | eval_protocol/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/e2e-smoke-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/workflows/e2e-smoke-test.yml -------------------------------------------------------------------------------- /.github/workflows/fireworks-tracing-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/workflows/fireworks-tracing-tests.yml -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/workflows/release.yml -------------------------------------------------------------------------------- /.github/workflows/rollout.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.github/workflows/rollout.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.vscode/extensions.json -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.vscode/launch.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/SECURITY.md -------------------------------------------------------------------------------- /assets/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/assets/ui.png -------------------------------------------------------------------------------- /conf/cli/run_eval_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/cli/run_eval_config.yaml -------------------------------------------------------------------------------- /conf/dataset/apps_full_prompts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/apps_full_prompts.yaml -------------------------------------------------------------------------------- /conf/dataset/apps_prompts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/apps_prompts.yaml -------------------------------------------------------------------------------- /conf/dataset/apps_source.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/apps_source.yaml -------------------------------------------------------------------------------- /conf/dataset/base_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/base_dataset.yaml -------------------------------------------------------------------------------- /conf/dataset/base_derived_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/base_derived_dataset.yaml -------------------------------------------------------------------------------- /conf/dataset/gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/gsm8k.yaml -------------------------------------------------------------------------------- /conf/dataset/gsm8k_full_test_prompts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/gsm8k_full_test_prompts.yaml -------------------------------------------------------------------------------- /conf/dataset/gsm8k_local_prompts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/gsm8k_local_prompts.yaml -------------------------------------------------------------------------------- /conf/dataset/gsm8k_math_prompts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/gsm8k_math_prompts.yaml -------------------------------------------------------------------------------- /conf/dataset/jsonl_direct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/jsonl_direct.yaml -------------------------------------------------------------------------------- /conf/dataset/xlam_fc_eval_prompts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/xlam_fc_eval_prompts.yaml -------------------------------------------------------------------------------- /conf/dataset/xlam_fc_source.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/conf/dataset/xlam_fc_source.yaml -------------------------------------------------------------------------------- /development/CODING_DATASET.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/CODING_DATASET.jsonl -------------------------------------------------------------------------------- /development/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/CONTRIBUTING.md -------------------------------------------------------------------------------- /development/CORE_STRATEGY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/CORE_STRATEGY.md -------------------------------------------------------------------------------- /development/__init__.py: -------------------------------------------------------------------------------- 1 | # This file makes the 'development' directory a Python package. 2 | -------------------------------------------------------------------------------- /development/cloud_run_vs_cloud_functions_notes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/cloud_run_vs_cloud_functions_notes.md -------------------------------------------------------------------------------- /development/gsm8k_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/gsm8k_sample.jsonl -------------------------------------------------------------------------------- /development/managed_simulation_server_plan.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/managed_simulation_server_plan.md -------------------------------------------------------------------------------- /development/mcp_north_star.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/mcp_north_star.md -------------------------------------------------------------------------------- /development/normalize_sandbox_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/normalize_sandbox_fusion.py -------------------------------------------------------------------------------- /development/notes/N_variant_generation_design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/N_variant_generation_design.md -------------------------------------------------------------------------------- /development/notes/agent_rl_survey.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/agent_rl_survey.md -------------------------------------------------------------------------------- /development/notes/apps_coding_example_plan.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/apps_coding_example_plan.md -------------------------------------------------------------------------------- /development/notes/cli_evaluation_refactor_plan.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/cli_evaluation_refactor_plan.md -------------------------------------------------------------------------------- /development/notes/frozen_lake.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/frozen_lake.md -------------------------------------------------------------------------------- /development/notes/gigpo_breakdown.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/gigpo_breakdown.md -------------------------------------------------------------------------------- /development/notes/hydra_dataset_refactor_plan.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/hydra_dataset_refactor_plan.md -------------------------------------------------------------------------------- /development/notes/math_lighteval_example_plan.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/math_lighteval_example_plan.md -------------------------------------------------------------------------------- /development/notes/north_star_mcp_gym.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/north_star_mcp_gym.md -------------------------------------------------------------------------------- /development/notes/pytest_integration_proposal.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/pytest_integration_proposal.md -------------------------------------------------------------------------------- /development/notes/unify_gymnasium_api.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/unify_gymnasium_api.md -------------------------------------------------------------------------------- /development/notes/verl_replication_playbook.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/notes/verl_replication_playbook.md -------------------------------------------------------------------------------- /development/readiness/coding_example.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/readiness/coding_example.md -------------------------------------------------------------------------------- /development/readiness/function_calling_example.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/readiness/function_calling_example.md -------------------------------------------------------------------------------- /development/readiness/math_gsm8k.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/readiness/math_gsm8k.md -------------------------------------------------------------------------------- /development/readiness/math_openr1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/readiness/math_openr1.md -------------------------------------------------------------------------------- /development/record_and_playback_design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/record_and_playback_design.md -------------------------------------------------------------------------------- /development/record_replay_testing_handoff.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/record_replay_testing_handoff.md -------------------------------------------------------------------------------- /development/system_architecture/OVERVIEW.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/system_architecture/OVERVIEW.md -------------------------------------------------------------------------------- /development/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/utils/__init__.py -------------------------------------------------------------------------------- /development/utils/generate_api_key.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/utils/generate_api_key.py -------------------------------------------------------------------------------- /development/utils/subprocess_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/development/utils/subprocess_manager.py -------------------------------------------------------------------------------- /docs/DOCUMENTATION_STATUS.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/DOCUMENTATION_STATUS.mdx -------------------------------------------------------------------------------- /docs/api_reference/api_overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/api_reference/api_overview.mdx -------------------------------------------------------------------------------- /docs/api_reference/data_models.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/api_reference/data_models.mdx -------------------------------------------------------------------------------- /docs/api_reference/reward_function_class.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/api_reference/reward_function_class.mdx -------------------------------------------------------------------------------- /docs/api_reference/reward_function_decorator.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/api_reference/reward_function_decorator.mdx -------------------------------------------------------------------------------- /docs/cli_reference/cli_overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/cli_reference/cli_overview.mdx -------------------------------------------------------------------------------- /docs/cross_process_events.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/cross_process_events.md -------------------------------------------------------------------------------- /docs/dataset_configuration_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/dataset_configuration_guide.md -------------------------------------------------------------------------------- /docs/developer_guide/agent_evaluation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/agent_evaluation.mdx -------------------------------------------------------------------------------- /docs/developer_guide/core_data_types.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/core_data_types.mdx -------------------------------------------------------------------------------- /docs/developer_guide/evaluation_workflows.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/evaluation_workflows.mdx -------------------------------------------------------------------------------- /docs/developer_guide/getting_started.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/getting_started.mdx -------------------------------------------------------------------------------- /docs/developer_guide/hydra_configuration.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/hydra_configuration.mdx -------------------------------------------------------------------------------- /docs/developer_guide/images/create_evaluator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/images/create_evaluator.png -------------------------------------------------------------------------------- /docs/developer_guide/implementation_notes.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/implementation_notes.mdx -------------------------------------------------------------------------------- /docs/developer_guide/reward_function_anatomy.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/developer_guide/reward_function_anatomy.mdx -------------------------------------------------------------------------------- /docs/documentation_home.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/documentation_home.mdx -------------------------------------------------------------------------------- /docs/examples/apps_coding_example.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/examples/apps_coding_example.mdx -------------------------------------------------------------------------------- /docs/examples/examples_overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/examples/examples_overview.mdx -------------------------------------------------------------------------------- /docs/examples/math_with_formatting_example.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/examples/math_with_formatting_example.mdx -------------------------------------------------------------------------------- /docs/examples/tool_calling_example.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/examples/tool_calling_example.mdx -------------------------------------------------------------------------------- /docs/integrations/trl_integration_overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/integrations/trl_integration_overview.mdx -------------------------------------------------------------------------------- /docs/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/intro.png -------------------------------------------------------------------------------- /docs/main_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/main_screen.png -------------------------------------------------------------------------------- /docs/n_variant_batch_evaluation_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/n_variant_batch_evaluation_guide.md -------------------------------------------------------------------------------- /docs/tutorials/best_practices.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/tutorials/best_practices.mdx -------------------------------------------------------------------------------- /docs/tutorials/evaluating_model_responses.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/docs/tutorials/evaluating_model_responses.mdx -------------------------------------------------------------------------------- /eval_protocol/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/__init__.py -------------------------------------------------------------------------------- /eval_protocol/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/__main__.py -------------------------------------------------------------------------------- /eval_protocol/_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/_version.py -------------------------------------------------------------------------------- /eval_protocol/adapters/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/CONTRIBUTING.md -------------------------------------------------------------------------------- /eval_protocol/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/__init__.py -------------------------------------------------------------------------------- /eval_protocol/adapters/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/base.py -------------------------------------------------------------------------------- /eval_protocol/adapters/bigquery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/bigquery.py -------------------------------------------------------------------------------- /eval_protocol/adapters/braintrust.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/braintrust.py -------------------------------------------------------------------------------- /eval_protocol/adapters/fireworks_tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/fireworks_tracing.py -------------------------------------------------------------------------------- /eval_protocol/adapters/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/huggingface.py -------------------------------------------------------------------------------- /eval_protocol/adapters/langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/langchain.py -------------------------------------------------------------------------------- /eval_protocol/adapters/langfuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/langfuse.py -------------------------------------------------------------------------------- /eval_protocol/adapters/langsmith.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/langsmith.py -------------------------------------------------------------------------------- /eval_protocol/adapters/openai_responses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/openai_responses.py -------------------------------------------------------------------------------- /eval_protocol/adapters/trl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/trl.py -------------------------------------------------------------------------------- /eval_protocol/adapters/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/utils.py -------------------------------------------------------------------------------- /eval_protocol/adapters/weave.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/adapters/weave.py -------------------------------------------------------------------------------- /eval_protocol/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/__init__.py -------------------------------------------------------------------------------- /eval_protocol/agent/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/models.py -------------------------------------------------------------------------------- /eval_protocol/agent/orchestrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/orchestrator.py -------------------------------------------------------------------------------- /eval_protocol/agent/resource_abc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/resource_abc.py -------------------------------------------------------------------------------- /eval_protocol/agent/resource_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/resource_pool.py -------------------------------------------------------------------------------- /eval_protocol/agent/resources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/resources/__init__.py -------------------------------------------------------------------------------- /eval_protocol/agent/resources/docker_resource.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/resources/docker_resource.py -------------------------------------------------------------------------------- /eval_protocol/agent/resources/sql_resource.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/resources/sql_resource.py -------------------------------------------------------------------------------- /eval_protocol/agent/task_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/task_manager.py -------------------------------------------------------------------------------- /eval_protocol/agent/tool_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/agent/tool_registry.py -------------------------------------------------------------------------------- /eval_protocol/auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/auth.py -------------------------------------------------------------------------------- /eval_protocol/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_protocol/benchmarks/test_aime25.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/benchmarks/test_aime25.py -------------------------------------------------------------------------------- /eval_protocol/benchmarks/test_frozen_lake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/benchmarks/test_frozen_lake.py -------------------------------------------------------------------------------- /eval_protocol/benchmarks/test_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/benchmarks/test_gpqa.py -------------------------------------------------------------------------------- /eval_protocol/benchmarks/test_tau_bench_retail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/benchmarks/test_tau_bench_retail.py -------------------------------------------------------------------------------- /eval_protocol/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/__init__.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/agent_eval_cmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/agent_eval_cmd.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/common.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/create_rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/create_rft.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/deploy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/deploy.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/deploy_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/deploy_mcp.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/local_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/local_test.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/logs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/logs.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/preview.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/preview.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/run_eval_cmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/run_eval_cmd.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/upload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/upload.py -------------------------------------------------------------------------------- /eval_protocol/cli_commands/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/cli_commands/utils.py -------------------------------------------------------------------------------- /eval_protocol/common_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/common_utils.py -------------------------------------------------------------------------------- /eval_protocol/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/config.py -------------------------------------------------------------------------------- /eval_protocol/data_loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/data_loader/__init__.py -------------------------------------------------------------------------------- /eval_protocol/data_loader/dynamic_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/data_loader/dynamic_data_loader.py -------------------------------------------------------------------------------- /eval_protocol/data_loader/factory_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/data_loader/factory_data_loader.py -------------------------------------------------------------------------------- /eval_protocol/data_loader/inline_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/data_loader/inline_data_loader.py -------------------------------------------------------------------------------- /eval_protocol/data_loader/jsonl_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/data_loader/jsonl_data_loader.py -------------------------------------------------------------------------------- /eval_protocol/data_loader/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/data_loader/models.py -------------------------------------------------------------------------------- /eval_protocol/dataset_logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/dataset_logger/__init__.py -------------------------------------------------------------------------------- /eval_protocol/dataset_logger/dataset_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/dataset_logger/dataset_logger.py -------------------------------------------------------------------------------- /eval_protocol/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/datasets/__init__.py -------------------------------------------------------------------------------- /eval_protocol/datasets/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/datasets/loader.py -------------------------------------------------------------------------------- /eval_protocol/directory_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/directory_utils.py -------------------------------------------------------------------------------- /eval_protocol/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/evaluation.py -------------------------------------------------------------------------------- /eval_protocol/event_bus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/event_bus/__init__.py -------------------------------------------------------------------------------- /eval_protocol/event_bus/event_bus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/event_bus/event_bus.py -------------------------------------------------------------------------------- /eval_protocol/event_bus/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/event_bus/logger.py -------------------------------------------------------------------------------- /eval_protocol/event_bus/sqlite_event_bus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/event_bus/sqlite_event_bus.py -------------------------------------------------------------------------------- /eval_protocol/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/exceptions.py -------------------------------------------------------------------------------- /eval_protocol/execution/__init__.py: -------------------------------------------------------------------------------- 1 | """Core components for executing evaluation pipelines.""" 2 | -------------------------------------------------------------------------------- /eval_protocol/execution/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/execution/pipeline.py -------------------------------------------------------------------------------- /eval_protocol/fireworks_rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/fireworks_rft.py -------------------------------------------------------------------------------- /eval_protocol/gcp_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/gcp_tools.py -------------------------------------------------------------------------------- /eval_protocol/generation/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/generation/cache.py -------------------------------------------------------------------------------- /eval_protocol/generation/clients.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/generation/clients.py -------------------------------------------------------------------------------- /eval_protocol/generation/clients/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/generation/clients/base.py -------------------------------------------------------------------------------- /eval_protocol/generic_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/generic_server.py -------------------------------------------------------------------------------- /eval_protocol/get_pep440_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/get_pep440_version.py -------------------------------------------------------------------------------- /eval_protocol/human_id/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/human_id/__init__.py -------------------------------------------------------------------------------- /eval_protocol/human_id/dictionary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/human_id/dictionary.py -------------------------------------------------------------------------------- /eval_protocol/integrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/integrations/__init__.py -------------------------------------------------------------------------------- /eval_protocol/integrations/deepeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/integrations/deepeval.py -------------------------------------------------------------------------------- /eval_protocol/integrations/openai_rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/integrations/openai_rft.py -------------------------------------------------------------------------------- /eval_protocol/integrations/openeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/integrations/openeval.py -------------------------------------------------------------------------------- /eval_protocol/integrations/tinker_cookbook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/integrations/tinker_cookbook.py -------------------------------------------------------------------------------- /eval_protocol/integrations/trl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/integrations/trl.py -------------------------------------------------------------------------------- /eval_protocol/log_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_protocol/log_utils/elasticsearch_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/log_utils/elasticsearch_client.py -------------------------------------------------------------------------------- /eval_protocol/log_utils/init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/log_utils/init.py -------------------------------------------------------------------------------- /eval_protocol/log_utils/rollout_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/log_utils/rollout_context.py -------------------------------------------------------------------------------- /eval_protocol/log_utils/rollout_id_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/log_utils/rollout_id_filter.py -------------------------------------------------------------------------------- /eval_protocol/log_utils/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/log_utils/util.py -------------------------------------------------------------------------------- /eval_protocol/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/logging_utils.py -------------------------------------------------------------------------------- /eval_protocol/mcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/__init__.py -------------------------------------------------------------------------------- /eval_protocol/mcp/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/adapter.py -------------------------------------------------------------------------------- /eval_protocol/mcp/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/client/__init__.py -------------------------------------------------------------------------------- /eval_protocol/mcp/client/connection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/client/connection.py -------------------------------------------------------------------------------- /eval_protocol/mcp/clients.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/clients.py -------------------------------------------------------------------------------- /eval_protocol/mcp/execution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/execution/__init__.py -------------------------------------------------------------------------------- /eval_protocol/mcp/execution/base_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/execution/base_policy.py -------------------------------------------------------------------------------- /eval_protocol/mcp/execution/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/execution/manager.py -------------------------------------------------------------------------------- /eval_protocol/mcp/execution/policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/execution/policy.py -------------------------------------------------------------------------------- /eval_protocol/mcp/execution/vllm_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/execution/vllm_policy.py -------------------------------------------------------------------------------- /eval_protocol/mcp/grid_renderer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/grid_renderer.py -------------------------------------------------------------------------------- /eval_protocol/mcp/mcp_multi_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/mcp_multi_client.py -------------------------------------------------------------------------------- /eval_protocol/mcp/mcpgym.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/mcpgym.py -------------------------------------------------------------------------------- /eval_protocol/mcp/process_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/process_manager.py -------------------------------------------------------------------------------- /eval_protocol/mcp/session/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/session/__init__.py -------------------------------------------------------------------------------- /eval_protocol/mcp/session/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/session/manager.py -------------------------------------------------------------------------------- /eval_protocol/mcp/simple_process_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/simple_process_manager.py -------------------------------------------------------------------------------- /eval_protocol/mcp/simulation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp/simulation_server.py -------------------------------------------------------------------------------- /eval_protocol/mcp_agent/__init__.py: -------------------------------------------------------------------------------- 1 | # Eval Protocol MCP Agent Package 2 | -------------------------------------------------------------------------------- /eval_protocol/mcp_agent/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_agent/config.py -------------------------------------------------------------------------------- /eval_protocol/mcp_agent/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_agent/main.py -------------------------------------------------------------------------------- /eval_protocol/mcp_agent/orchestration/__init__.py: -------------------------------------------------------------------------------- 1 | # MCP Agent Orchestration Package 2 | -------------------------------------------------------------------------------- /eval_protocol/mcp_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_env.py -------------------------------------------------------------------------------- /eval_protocol/mcp_servers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_protocol/mcp_servers/frozen_lake/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_servers/frozen_lake/server.py -------------------------------------------------------------------------------- /eval_protocol/mcp_servers/tau2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_servers/tau2/README.md -------------------------------------------------------------------------------- /eval_protocol/mcp_servers/tau2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_servers/tau2/__init__.py -------------------------------------------------------------------------------- /eval_protocol/mcp_servers/tau2/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_servers/tau2/server.py -------------------------------------------------------------------------------- /eval_protocol/mcp_servers/tau2/tau2_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/mcp_servers/tau2/tau2_mcp.py -------------------------------------------------------------------------------- /eval_protocol/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/models.py -------------------------------------------------------------------------------- /eval_protocol/packaging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/packaging.py -------------------------------------------------------------------------------- /eval_protocol/platform_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/platform_api.py -------------------------------------------------------------------------------- /eval_protocol/playback_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/playback_policy.py -------------------------------------------------------------------------------- /eval_protocol/proxy/.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/.env.example -------------------------------------------------------------------------------- /eval_protocol/proxy/Dockerfile.gateway: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/Dockerfile.gateway -------------------------------------------------------------------------------- /eval_protocol/proxy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/README.md -------------------------------------------------------------------------------- /eval_protocol/proxy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/__init__.py -------------------------------------------------------------------------------- /eval_protocol/proxy/config_no_cache.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/config_no_cache.yaml -------------------------------------------------------------------------------- /eval_protocol/proxy/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/docker-compose.yml -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/__init__.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/app.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/auth.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/langfuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/langfuse.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/litellm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/litellm.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/main.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/models.py -------------------------------------------------------------------------------- /eval_protocol/proxy/proxy_core/redis_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/proxy_core/redis_utils.py -------------------------------------------------------------------------------- /eval_protocol/proxy/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/proxy/requirements.txt -------------------------------------------------------------------------------- /eval_protocol/pytest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/__init__.py -------------------------------------------------------------------------------- /eval_protocol/pytest/default_dataset_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/default_dataset_adapter.py -------------------------------------------------------------------------------- /eval_protocol/pytest/dual_mode_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/dual_mode_wrapper.py -------------------------------------------------------------------------------- /eval_protocol/pytest/elasticsearch_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/elasticsearch_setup.py -------------------------------------------------------------------------------- /eval_protocol/pytest/evaluation_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/evaluation_test.py -------------------------------------------------------------------------------- /eval_protocol/pytest/evaluation_test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/evaluation_test_utils.py -------------------------------------------------------------------------------- /eval_protocol/pytest/exception_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/exception_config.py -------------------------------------------------------------------------------- /eval_protocol/pytest/execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/execution.py -------------------------------------------------------------------------------- /eval_protocol/pytest/handle_persist_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/handle_persist_flow.py -------------------------------------------------------------------------------- /eval_protocol/pytest/openenv_rollout_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/openenv_rollout_processor.py -------------------------------------------------------------------------------- /eval_protocol/pytest/parameterize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/parameterize.py -------------------------------------------------------------------------------- /eval_protocol/pytest/plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/plugin.py -------------------------------------------------------------------------------- /eval_protocol/pytest/remote_rollout_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/remote_rollout_processor.py -------------------------------------------------------------------------------- /eval_protocol/pytest/rollout_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/rollout_processor.py -------------------------------------------------------------------------------- /eval_protocol/pytest/store_experiment_link.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/store_experiment_link.py -------------------------------------------------------------------------------- /eval_protocol/pytest/store_results_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/store_results_url.py -------------------------------------------------------------------------------- /eval_protocol/pytest/tracing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/tracing_utils.py -------------------------------------------------------------------------------- /eval_protocol/pytest/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/types.py -------------------------------------------------------------------------------- /eval_protocol/pytest/validate_signature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/pytest/validate_signature.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/__init__.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/aha_judge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/aha_judge/__init__.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/aha_judge/llm_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/aha_judge/llm_judge.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/aha_judge/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/aha_judge/utils.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/llm_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/llm_judge.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/llm_judge_braintrust.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/llm_judge_braintrust.py -------------------------------------------------------------------------------- /eval_protocol/quickstart/svg_agent/evaluator/requirements.txt: -------------------------------------------------------------------------------- 1 | eval-protocol[svgbench]>=0.2.72 2 | -------------------------------------------------------------------------------- /eval_protocol/quickstart/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/quickstart/utils.py -------------------------------------------------------------------------------- /eval_protocol/resources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/resources.py -------------------------------------------------------------------------------- /eval_protocol/reward_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/reward_function.py -------------------------------------------------------------------------------- /eval_protocol/rewards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/__init__.py -------------------------------------------------------------------------------- /eval_protocol/rewards/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/accuracy.py -------------------------------------------------------------------------------- /eval_protocol/rewards/accuracy_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/accuracy_length.py -------------------------------------------------------------------------------- /eval_protocol/rewards/apps_coding_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/apps_coding_reward.py -------------------------------------------------------------------------------- /eval_protocol/rewards/apps_execution_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/apps_execution_utils.py -------------------------------------------------------------------------------- /eval_protocol/rewards/apps_testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/apps_testing_util.py -------------------------------------------------------------------------------- /eval_protocol/rewards/bfcl_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/bfcl_reward.py -------------------------------------------------------------------------------- /eval_protocol/rewards/code_execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/code_execution.py -------------------------------------------------------------------------------- /eval_protocol/rewards/code_execution_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/code_execution_utils.py -------------------------------------------------------------------------------- /eval_protocol/rewards/cpp_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/cpp_code.py -------------------------------------------------------------------------------- /eval_protocol/rewards/deepcoder_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/deepcoder_reward.py -------------------------------------------------------------------------------- /eval_protocol/rewards/format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/format.py -------------------------------------------------------------------------------- /eval_protocol/rewards/function_calling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/function_calling.py -------------------------------------------------------------------------------- /eval_protocol/rewards/json_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/json_schema.py -------------------------------------------------------------------------------- /eval_protocol/rewards/language_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/language_consistency.py -------------------------------------------------------------------------------- /eval_protocol/rewards/lean_prover.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/lean_prover.py -------------------------------------------------------------------------------- /eval_protocol/rewards/length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/length.py -------------------------------------------------------------------------------- /eval_protocol/rewards/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/math.py -------------------------------------------------------------------------------- /eval_protocol/rewards/reasoning_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/reasoning_steps.py -------------------------------------------------------------------------------- /eval_protocol/rewards/repetition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/repetition.py -------------------------------------------------------------------------------- /eval_protocol/rewards/tag_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rewards/tag_count.py -------------------------------------------------------------------------------- /eval_protocol/rl_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/rl_processing.py -------------------------------------------------------------------------------- /eval_protocol/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/server.py -------------------------------------------------------------------------------- /eval_protocol/stats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/stats/__init__.py -------------------------------------------------------------------------------- /eval_protocol/stats/confidence_intervals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/stats/confidence_intervals.py -------------------------------------------------------------------------------- /eval_protocol/typed_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/typed_interface.py -------------------------------------------------------------------------------- /eval_protocol/types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/types/__init__.py -------------------------------------------------------------------------------- /eval_protocol/types/errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/types/errors.py -------------------------------------------------------------------------------- /eval_protocol/types/remote_rollout_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/types/remote_rollout_processor.py -------------------------------------------------------------------------------- /eval_protocol/types/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/types/types.py -------------------------------------------------------------------------------- /eval_protocol/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/__init__.py -------------------------------------------------------------------------------- /eval_protocol/utils/batch_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/batch_evaluation.py -------------------------------------------------------------------------------- /eval_protocol/utils/batch_transformation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/batch_transformation.py -------------------------------------------------------------------------------- /eval_protocol/utils/browser_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/browser_utils.py -------------------------------------------------------------------------------- /eval_protocol/utils/check_server_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/check_server_status.py -------------------------------------------------------------------------------- /eval_protocol/utils/dataset_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/dataset_helpers.py -------------------------------------------------------------------------------- /eval_protocol/utils/evaluation_row_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/evaluation_row_utils.py -------------------------------------------------------------------------------- /eval_protocol/utils/logs_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/logs_models.py -------------------------------------------------------------------------------- /eval_protocol/utils/logs_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/logs_server.py -------------------------------------------------------------------------------- /eval_protocol/utils/module_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/module_loader.py -------------------------------------------------------------------------------- /eval_protocol/utils/packaging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/packaging_utils.py -------------------------------------------------------------------------------- /eval_protocol/utils/show_results_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/show_results_url.py -------------------------------------------------------------------------------- /eval_protocol/utils/static_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/static_policy.py -------------------------------------------------------------------------------- /eval_protocol/utils/subprocess_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/subprocess_utils.py -------------------------------------------------------------------------------- /eval_protocol/utils/vite_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/eval_protocol/utils/vite_server.py -------------------------------------------------------------------------------- /examples/CLEAN_FRAMEWORK_SUMMARY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/CLEAN_FRAMEWORK_SUMMARY.md -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/UNIFIED_FRAMEWORK_SUMMARY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/UNIFIED_FRAMEWORK_SUMMARY.md -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | """Examples package for Eval Protocol.""" 2 | -------------------------------------------------------------------------------- /examples/adapters/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/adapters/README.md -------------------------------------------------------------------------------- /examples/adapters/gsm8k_replacement_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/adapters/gsm8k_replacement_example.py -------------------------------------------------------------------------------- /examples/adapters/huggingface_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/adapters/huggingface_example.py -------------------------------------------------------------------------------- /examples/adapters/langfuse_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/adapters/langfuse_example.py -------------------------------------------------------------------------------- /examples/aime2025_chat_completion/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/aime2025_chat_completion/README.md -------------------------------------------------------------------------------- /examples/aime2025_chat_completion/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["main"] 2 | -------------------------------------------------------------------------------- /examples/aime2025_chat_completion/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/aime2025_chat_completion/main.py -------------------------------------------------------------------------------- /examples/apps_coding_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/apps_coding_example/README.md -------------------------------------------------------------------------------- /examples/apps_coding_example/conf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/apps_coding_example/conf/__init__.py -------------------------------------------------------------------------------- /examples/apps_coding_example/conf/run_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/apps_coding_example/conf/run_eval.yaml -------------------------------------------------------------------------------- /examples/apps_coding_example/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/apps_coding_example/main.py -------------------------------------------------------------------------------- /examples/blackjack_mcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/blackjack_mcp/__init__.py -------------------------------------------------------------------------------- /examples/blackjack_mcp/blackjack_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/blackjack_mcp/blackjack_adapter.py -------------------------------------------------------------------------------- /examples/blackjack_mcp/blackjack_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/blackjack_mcp/blackjack_mcp.py -------------------------------------------------------------------------------- /examples/blackjack_mcp/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/blackjack_mcp/server.py -------------------------------------------------------------------------------- /examples/blackjack_mcp/shared_data/rollouts.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/blackjack_mcp/shared_data/rollouts.jsonl -------------------------------------------------------------------------------- /examples/cliff_walking_mcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/cliff_walking_mcp/__init__.py -------------------------------------------------------------------------------- /examples/cliff_walking_mcp/cliff_walking_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/cliff_walking_mcp/cliff_walking_mcp.py -------------------------------------------------------------------------------- /examples/cliff_walking_mcp/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/cliff_walking_mcp/server.py -------------------------------------------------------------------------------- /examples/e2b_auto_extract_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/e2b_auto_extract_example.py -------------------------------------------------------------------------------- /examples/e2b_fallback_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/e2b_fallback_example.py -------------------------------------------------------------------------------- /examples/e2b_javascript_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/e2b_javascript_example.py -------------------------------------------------------------------------------- /examples/e2b_reward_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/e2b_reward_example.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/README.md -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/__init__.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/frozen_lake_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/frozen_lake_adapter.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/frozen_lake_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/frozen_lake_mcp.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/requirements.txt -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/rollout_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/rollout_example.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/server.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/server_logs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/server_logs.txt -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/test_multi_session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/test_multi_session.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/test_north_star.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/test_north_star.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/test_seed_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/test_seed_logging.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/test_termination_fix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/test_termination_fix.py -------------------------------------------------------------------------------- /examples/frozen_lake_mcp/test_validation_logic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/frozen_lake_mcp/test_validation_logic.py -------------------------------------------------------------------------------- /examples/gpqa/tests/test_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/gpqa/tests/test_gpqa.py -------------------------------------------------------------------------------- /examples/healthbench/tests/test_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/healthbench/tests/test_evaluation.py -------------------------------------------------------------------------------- /examples/langgraph/__init__.py: -------------------------------------------------------------------------------- 1 | # Package marker for examples.langgraph 2 | -------------------------------------------------------------------------------- /examples/langgraph/data/simple_prompts.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/langgraph/data/simple_prompts.jsonl -------------------------------------------------------------------------------- /examples/langgraph/simple_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/langgraph/simple_graph.py -------------------------------------------------------------------------------- /examples/langgraph/test_langgraph_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/langgraph/test_langgraph_rollout.py -------------------------------------------------------------------------------- /examples/langgraph/test_reasoning_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/langgraph/test_reasoning_rollout.py -------------------------------------------------------------------------------- /examples/langgraph/test_tools_langsmith_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/langgraph/test_tools_langsmith_trace.py -------------------------------------------------------------------------------- /examples/langgraph/tools_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/langgraph/tools_graph.py -------------------------------------------------------------------------------- /examples/lunar_lander_mcp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/lunar_lander_mcp/README.md -------------------------------------------------------------------------------- /examples/lunar_lander_mcp/lunar_lander_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/lunar_lander_mcp/lunar_lander_adapter.py -------------------------------------------------------------------------------- /examples/lunar_lander_mcp/lunar_lander_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/lunar_lander_mcp/lunar_lander_mcp.py -------------------------------------------------------------------------------- /examples/lunar_lander_mcp/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/lunar_lander_mcp/requirements.txt -------------------------------------------------------------------------------- /examples/lunar_lander_mcp/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/lunar_lander_mcp/server.py -------------------------------------------------------------------------------- /examples/math_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_example/README.md -------------------------------------------------------------------------------- /examples/math_example/conf/simple_math_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_example/conf/simple_math_eval.yaml -------------------------------------------------------------------------------- /examples/math_example/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_example/main.py -------------------------------------------------------------------------------- /examples/math_with_format_and_length/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_with_format_and_length/README.md -------------------------------------------------------------------------------- /examples/math_with_format_and_length/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_with_format_and_length/main.py -------------------------------------------------------------------------------- /examples/math_with_formatting/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_with_formatting/README.md -------------------------------------------------------------------------------- /examples/math_with_formatting/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/math_with_formatting/main.py -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/mcp_agent_filesystem_rl/README.md -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/mcp_agent_filesystem_rl/config.yaml -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/mcp_agent_filesystem_rl/dataset.jsonl -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/mcp_agent_filesystem_rl/main.py -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/backup_dir/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/scenario_copy_001/source_dir/file_to_move.txt: -------------------------------------------------------------------------------- 1 | This file should be moved. 2 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/scenario_copy_001/source_dir/sample.txt: -------------------------------------------------------------------------------- 1 | This is a sample file. 2 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/scenario_create_001/source_dir/file_to_move.txt: -------------------------------------------------------------------------------- 1 | This file should be moved. 2 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/scenario_create_001/source_dir/sample.txt: -------------------------------------------------------------------------------- 1 | This is a sample file. 2 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/scenario_move_001/source_dir/file_to_move.txt: -------------------------------------------------------------------------------- 1 | This file should be moved. 2 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/scenario_move_001/source_dir/sample.txt: -------------------------------------------------------------------------------- 1 | This is a sample file. 2 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/templates/workspace/target_dir/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mcp_agent_filesystem_rl/test_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/mcp_agent_filesystem_rl/test_example.py -------------------------------------------------------------------------------- /examples/metrics/llm_resource_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/metrics/llm_resource_example/README.md -------------------------------------------------------------------------------- /examples/metrics/llm_resource_example/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/metrics/llm_resource_example/main.py -------------------------------------------------------------------------------- /examples/metrics/word_count/dummy_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/metrics/word_count/dummy_sample.jsonl -------------------------------------------------------------------------------- /examples/metrics/word_count/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/metrics/word_count/main.py -------------------------------------------------------------------------------- /examples/n_variant_example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/n_variant_example.yaml -------------------------------------------------------------------------------- /examples/n_variant_to_batch_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/n_variant_to_batch_demo.py -------------------------------------------------------------------------------- /examples/openai_rft/example_rapidfuzz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/openai_rft/example_rapidfuzz.py -------------------------------------------------------------------------------- /examples/openai_rft/test_openai_grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/openai_rft/test_openai_grader.py -------------------------------------------------------------------------------- /examples/rollout_control_plane_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/rollout_control_plane_demo.py -------------------------------------------------------------------------------- /examples/row_wise/dummy_example/dummy_rewards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/row_wise/dummy_example/dummy_rewards.py -------------------------------------------------------------------------------- /examples/tau2_mcp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/README.md -------------------------------------------------------------------------------- /examples/tau2_mcp/airplane_environment/db.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/airplane_environment/db.json -------------------------------------------------------------------------------- /examples/tau2_mcp/mock_environment/db.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/mock_environment/db.json -------------------------------------------------------------------------------- /examples/tau2_mcp/retail_environment/db.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/retail_environment/db.json -------------------------------------------------------------------------------- /examples/tau2_mcp/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/server.py -------------------------------------------------------------------------------- /examples/tau2_mcp/tau2_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/tau2_mcp.py -------------------------------------------------------------------------------- /examples/tau2_mcp/tests/datasets/airline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/tests/datasets/airline.json -------------------------------------------------------------------------------- /examples/tau2_mcp/tests/datasets/mock.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/tests/datasets/mock.json -------------------------------------------------------------------------------- /examples/tau2_mcp/tests/datasets/retail.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/tests/datasets/retail.json -------------------------------------------------------------------------------- /examples/tau2_mcp/tests/test_tau2_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tau2_mcp/tests/test_tau2_e2e.py -------------------------------------------------------------------------------- /examples/taxi_mcp_complete/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/taxi_mcp_complete/README.md -------------------------------------------------------------------------------- /examples/taxi_mcp_complete/local_testing/clean_openai_format.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/taxi_mcp_complete/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/taxi_mcp_complete/tests/conftest.py -------------------------------------------------------------------------------- /examples/taxi_mcp_complete/tests/test_taxi_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/taxi_mcp_complete/tests/test_taxi_e2e.py -------------------------------------------------------------------------------- /examples/test_tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # Test tasks for agent evaluation framework 2 | -------------------------------------------------------------------------------- /examples/test_tasks/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/test_tasks/reward.py -------------------------------------------------------------------------------- /examples/test_tasks/task1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/test_tasks/task1.yaml -------------------------------------------------------------------------------- /examples/test_tasks/task2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/test_tasks/task2.yaml -------------------------------------------------------------------------------- /examples/test_tasks/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/test_tasks/tools.py -------------------------------------------------------------------------------- /examples/test_unified_framework_final.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/test_unified_framework_final.py -------------------------------------------------------------------------------- /examples/test_unified_servers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/test_unified_servers.py -------------------------------------------------------------------------------- /examples/tinker_math_rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tinker_math_rl/README.md -------------------------------------------------------------------------------- /examples/tinker_math_rl/plot_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tinker_math_rl/plot_metrics.py -------------------------------------------------------------------------------- /examples/tinker_math_rl/test_gsm8k_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tinker_math_rl/test_gsm8k_eval.py -------------------------------------------------------------------------------- /examples/tinker_math_rl/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tinker_math_rl/train.py -------------------------------------------------------------------------------- /examples/tool_calling_example/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tool_calling_example/README.md -------------------------------------------------------------------------------- /examples/tool_calling_example/dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tool_calling_example/dataset.jsonl -------------------------------------------------------------------------------- /examples/tool_calling_example/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tool_calling_example/main.py -------------------------------------------------------------------------------- /examples/tracing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/tracing/weave/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tracing/weave/__init__.py -------------------------------------------------------------------------------- /examples/tracing/weave/converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/tracing/weave/converter.py -------------------------------------------------------------------------------- /examples/trl_integration/__init__.py: -------------------------------------------------------------------------------- 1 | # This file makes the trl_integration directory a Python package. 2 | -------------------------------------------------------------------------------- /examples/trl_integration/grpo_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/trl_integration/grpo_example.py -------------------------------------------------------------------------------- /examples/trl_integration/ppo_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/trl_integration/ppo_example.py -------------------------------------------------------------------------------- /examples/trl_integration/test_trl_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/trl_integration/test_trl_integration.py -------------------------------------------------------------------------------- /examples/trl_integration/trl_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/trl_integration/trl_adapter.py -------------------------------------------------------------------------------- /examples/trl_integration/working_grpo_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/examples/trl_integration/working_grpo_example.py -------------------------------------------------------------------------------- /local_evals/datasets/airline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/local_evals/datasets/airline.json -------------------------------------------------------------------------------- /local_evals/model_comparison_eval.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/local_evals/model_comparison_eval.ipynb -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/pyproject.toml -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/pytest.ini -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /scripts/convert_apps_to_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/scripts/convert_apps_to_prompts.py -------------------------------------------------------------------------------- /scripts/convert_bfcl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/scripts/convert_bfcl_dataset.py -------------------------------------------------------------------------------- /scripts/create_sample_gsm8k_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/scripts/create_sample_gsm8k_jsonl.py -------------------------------------------------------------------------------- /scripts/migrate_datasets.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/validate_dev_tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/scripts/validate_dev_tracing.py -------------------------------------------------------------------------------- /scripts/validate_remote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/scripts/validate_remote.py -------------------------------------------------------------------------------- /scripts/verify_logging_locally.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/scripts/verify_logging_locally.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/setup.py -------------------------------------------------------------------------------- /test_event_bus_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/test_event_bus_helper.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/adapters/test_braintrust_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/adapters/test_braintrust_adapter.py -------------------------------------------------------------------------------- /tests/adapters/test_langfuse_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/adapters/test_langfuse_adapter.py -------------------------------------------------------------------------------- /tests/adapters/test_langsmith_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/adapters/test_langsmith_adapter.py -------------------------------------------------------------------------------- /tests/adapters/test_openai_responses_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/adapters/test_openai_responses_adapter.py -------------------------------------------------------------------------------- /tests/adapters/test_weave_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/adapters/test_weave_adapter.py -------------------------------------------------------------------------------- /tests/chinook/Chinook_PostgreSql.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/Chinook_PostgreSql.sql -------------------------------------------------------------------------------- /tests/chinook/braintrust/generate_traces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/braintrust/generate_traces.py -------------------------------------------------------------------------------- /tests/chinook/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset.py -------------------------------------------------------------------------------- /tests/chinook/dataset/task_1/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_1/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_1/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_1/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_10/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_10/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_10/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_10/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_11/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_11/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_11/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_11/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_12/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_12/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_12/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_12/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_2/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_2/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_2/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_2/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_3/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_3/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_3/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_3/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_4/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_4/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_4/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_4/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_5/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_5/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_5/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_5/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_6/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_6/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_6/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_6/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_7/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_7/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_7/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_7/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_8/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_8/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_8/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_8/task.txt -------------------------------------------------------------------------------- /tests/chinook/dataset/task_9/ground_truth.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_9/ground_truth.md -------------------------------------------------------------------------------- /tests/chinook/dataset/task_9/task.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/dataset/task_9/task.txt -------------------------------------------------------------------------------- /tests/chinook/db.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/db.py -------------------------------------------------------------------------------- /tests/chinook/langfuse/generate_traces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langfuse/generate_traces.py -------------------------------------------------------------------------------- /tests/chinook/langfuse/test_langfuse_chinook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langfuse/test_langfuse_chinook.py -------------------------------------------------------------------------------- /tests/chinook/langgraph/graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langgraph/graph.py -------------------------------------------------------------------------------- /tests/chinook/langgraph/test_langgraph_chinook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langgraph/test_langgraph_chinook.py -------------------------------------------------------------------------------- /tests/chinook/langgraph/tools_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langgraph/tools_graph.py -------------------------------------------------------------------------------- /tests/chinook/langsmith/generate_traces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langsmith/generate_traces.py -------------------------------------------------------------------------------- /tests/chinook/langsmith/test_langsmith_chinook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/langsmith/test_langsmith_chinook.py -------------------------------------------------------------------------------- /tests/chinook/pydantic/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/pydantic/agent.py -------------------------------------------------------------------------------- /tests/chinook/pydantic/test_pydantic_chinook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/pydantic/test_pydantic_chinook.py -------------------------------------------------------------------------------- /tests/chinook/tasks.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/chinook/tasks.jsonl -------------------------------------------------------------------------------- /tests/cli_commands/test_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/cli_commands/test_common.py -------------------------------------------------------------------------------- /tests/cli_commands/test_deploy_cmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/cli_commands/test_deploy_cmd.py -------------------------------------------------------------------------------- /tests/cli_commands/test_preview_cmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/cli_commands/test_preview_cmd.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/data_loader/test_dynamic_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/data_loader/test_dynamic_data_loader.py -------------------------------------------------------------------------------- /tests/data_loader/test_inline_data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/data_loader/test_inline_data_loader.py -------------------------------------------------------------------------------- /tests/dataset_logger/.gitignore: -------------------------------------------------------------------------------- 1 | *.db 2 | -------------------------------------------------------------------------------- /tests/dummy_module_for_server_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/dummy_module_for_server_test.py -------------------------------------------------------------------------------- /tests/eval_protocol_tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/eval_protocol_tests/README.md -------------------------------------------------------------------------------- /tests/eval_protocol_tests/demo_dual_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/eval_protocol_tests/demo_dual_imports.py -------------------------------------------------------------------------------- /tests/execution/test_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/execution/test_pipeline.py -------------------------------------------------------------------------------- /tests/generation/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/generation/test_cache.py -------------------------------------------------------------------------------- /tests/generation/test_clients.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/generation/test_clients.py -------------------------------------------------------------------------------- /tests/github_actions/__init__.py: -------------------------------------------------------------------------------- 1 | # GitHub Actions rollout processor tests and scripts 2 | -------------------------------------------------------------------------------- /tests/github_actions/quickstart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/github_actions/quickstart.py -------------------------------------------------------------------------------- /tests/github_actions/rollout_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/github_actions/rollout_worker.py -------------------------------------------------------------------------------- /tests/logging/test_rollout_context_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/logging/test_rollout_context_logging.py -------------------------------------------------------------------------------- /tests/mcp_agent/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests for MCP Agent 2 | -------------------------------------------------------------------------------- /tests/mcp_agent/mock_mcp_server_image/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/mcp_agent/mock_mcp_server_image/Dockerfile -------------------------------------------------------------------------------- /tests/mcp_agent/orchestration/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests for MCP Agent Orchestration 2 | -------------------------------------------------------------------------------- /tests/mcp_agent/test_rl_filesystem_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/mcp_agent/test_rl_filesystem_scenario.py -------------------------------------------------------------------------------- /tests/mocks/verifiers/__init__.py: -------------------------------------------------------------------------------- 1 | """Mock verifiers package.""" 2 | -------------------------------------------------------------------------------- /tests/mocks/verifiers/envs/__init__.py: -------------------------------------------------------------------------------- 1 | """Mock verifiers environments.""" 2 | -------------------------------------------------------------------------------- /tests/mocks/verifiers/envs/bfcl_envs/__init__.py: -------------------------------------------------------------------------------- 1 | """Mock BFCL environments.""" 2 | -------------------------------------------------------------------------------- /tests/pytest/data/airline_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/airline_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/apps_sample_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/apps_sample_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/basic_coding_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/basic_coding_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/frozen_lake_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/frozen_lake_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/function_calling.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/function_calling.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/halueval_sample_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/halueval_sample_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/import_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/import_data.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/json_schema.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/json_schema.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/lunar_lander_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/lunar_lander_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/markdown_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/markdown_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/mcp_config.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pytest/data/retail_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/retail_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/svgbench_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/svgbench_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/data/svgbench_sample_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/data/svgbench_sample_dataset.jsonl -------------------------------------------------------------------------------- /tests/pytest/datasets/gmail_inbox.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/datasets/gmail_inbox.jsonl -------------------------------------------------------------------------------- /tests/pytest/gsm8k/requirements.txt: -------------------------------------------------------------------------------- 1 | eval-protocol 2 | -------------------------------------------------------------------------------- /tests/pytest/gsm8k/test_pytest_math_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/gsm8k/test_pytest_math_example.py -------------------------------------------------------------------------------- /tests/pytest/helper/gsm8k_to_evaluation_row.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/helper/gsm8k_to_evaluation_row.py -------------------------------------------------------------------------------- /tests/pytest/mcp/mock_discord.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/mcp/mock_discord.py -------------------------------------------------------------------------------- /tests/pytest/test_apps_coding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_apps_coding.py -------------------------------------------------------------------------------- /tests/pytest/test_basic_coding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_basic_coding.py -------------------------------------------------------------------------------- /tests/pytest/test_execution_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_execution_metadata.py -------------------------------------------------------------------------------- /tests/pytest/test_frozen_lake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_frozen_lake.py -------------------------------------------------------------------------------- /tests/pytest/test_get_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_get_metadata.py -------------------------------------------------------------------------------- /tests/pytest/test_hallucination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_hallucination.py -------------------------------------------------------------------------------- /tests/pytest/test_langgraph_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_langgraph_processor.py -------------------------------------------------------------------------------- /tests/pytest/test_livesvgbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_livesvgbench.py -------------------------------------------------------------------------------- /tests/pytest/test_lunar_lander.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_lunar_lander.py -------------------------------------------------------------------------------- /tests/pytest/test_markdown_highlighting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_markdown_highlighting.py -------------------------------------------------------------------------------- /tests/pytest/test_mcp_session_autocreate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_mcp_session_autocreate.py -------------------------------------------------------------------------------- /tests/pytest/test_openenv_browsergym_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_openenv_browsergym_basic.py -------------------------------------------------------------------------------- /tests/pytest/test_openenv_browsergym_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_openenv_browsergym_eval.py -------------------------------------------------------------------------------- /tests/pytest/test_openenv_echo_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_openenv_echo_hub.py -------------------------------------------------------------------------------- /tests/pytest/test_openenv_textarena_docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_openenv_textarena_docker.py -------------------------------------------------------------------------------- /tests/pytest/test_parameterize_validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_parameterize_validation.py -------------------------------------------------------------------------------- /tests/pytest/test_parameterized_ids.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_parameterized_ids.py -------------------------------------------------------------------------------- /tests/pytest/test_pydantic_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pydantic_agent.py -------------------------------------------------------------------------------- /tests/pytest/test_pydantic_multi_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pydantic_multi_agent.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_async.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_ensure_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_ensure_logging.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_env_overwrite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_env_overwrite.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_flaky_sometimes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_flaky_sometimes.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_function_calling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_function_calling.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_groupwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_groupwise.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_ids.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_ids.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_import_external_logs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_import_external_logs.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_input_messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_input_messages.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_input_rows.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_input_rows.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_json_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_json_schema.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_klavis_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_klavis_mcp.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_math_format_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_math_format_length.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_mcp_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_mcp_config.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_mcp_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_mcp_url.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_propagate_error.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_propagate_error.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_stable_row_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_stable_row_id.py -------------------------------------------------------------------------------- /tests/pytest/test_pytest_word_count_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_pytest_word_count_example.py -------------------------------------------------------------------------------- /tests/pytest/test_svgbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_svgbench.py -------------------------------------------------------------------------------- /tests/pytest/test_tau_bench_airline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_tau_bench_airline.py -------------------------------------------------------------------------------- /tests/pytest/test_tool_response_single_string.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_tool_response_single_string.py -------------------------------------------------------------------------------- /tests/pytest/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/pytest/test_utils.py -------------------------------------------------------------------------------- /tests/remote_server/quickstart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/quickstart.py -------------------------------------------------------------------------------- /tests/remote_server/remote_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/remote_server.py -------------------------------------------------------------------------------- /tests/remote_server/remote_server_multi_turn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/remote_server_multi_turn.py -------------------------------------------------------------------------------- /tests/remote_server/test_remote_fireworks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/test_remote_fireworks.py -------------------------------------------------------------------------------- /tests/remote_server/test_remote_langfuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/test_remote_langfuse.py -------------------------------------------------------------------------------- /tests/remote_server/typescript-server/.gitignore: -------------------------------------------------------------------------------- 1 | !package.json 2 | -------------------------------------------------------------------------------- /tests/remote_server/typescript-server/bun.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/typescript-server/bun.lock -------------------------------------------------------------------------------- /tests/remote_server/typescript-server/env.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/remote_server/typescript-server/env.ts -------------------------------------------------------------------------------- /tests/rewards/test_apps_coding_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/rewards/test_apps_coding_reward.py -------------------------------------------------------------------------------- /tests/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_accuracy.py -------------------------------------------------------------------------------- /tests/test_accuracy_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_accuracy_length.py -------------------------------------------------------------------------------- /tests/test_adapters_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_adapters_e2e.py -------------------------------------------------------------------------------- /tests/test_agent_orchestrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_agent_orchestrator.py -------------------------------------------------------------------------------- /tests/test_agent_resources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_agent_resources.py -------------------------------------------------------------------------------- /tests/test_auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_auth.py -------------------------------------------------------------------------------- /tests/test_batch_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_batch_evaluation.py -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_cli.py -------------------------------------------------------------------------------- /tests/test_cli_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_cli_agent.py -------------------------------------------------------------------------------- /tests/test_cli_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_cli_args.py -------------------------------------------------------------------------------- /tests/test_cli_create_rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_cli_create_rft.py -------------------------------------------------------------------------------- /tests/test_cli_local_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_cli_local_test.py -------------------------------------------------------------------------------- /tests/test_code_execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_code_execution.py -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_config.py -------------------------------------------------------------------------------- /tests/test_control_plane_separation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_control_plane_separation.py -------------------------------------------------------------------------------- /tests/test_cpp_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_cpp_code.py -------------------------------------------------------------------------------- /tests/test_data_driven_task_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_data_driven_task_manager.py -------------------------------------------------------------------------------- /tests/test_deepcoder_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_deepcoder_reward.py -------------------------------------------------------------------------------- /tests/test_deepeval_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_deepeval_integration.py -------------------------------------------------------------------------------- /tests/test_deploy_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_deploy_integration.py -------------------------------------------------------------------------------- /tests/test_directory_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_directory_utils.py -------------------------------------------------------------------------------- /tests/test_e2b_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_e2b_integration.py -------------------------------------------------------------------------------- /tests/test_e2b_js_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_e2b_js_integration.py -------------------------------------------------------------------------------- /tests/test_edge_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_edge_cases.py -------------------------------------------------------------------------------- /tests/test_ep_upload_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_ep_upload_e2e.py -------------------------------------------------------------------------------- /tests/test_eval_protocol_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_eval_protocol_import.py -------------------------------------------------------------------------------- /tests/test_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_evaluation.py -------------------------------------------------------------------------------- /tests/test_evaluation_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_evaluation_integration.py -------------------------------------------------------------------------------- /tests/test_evaluation_postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_evaluation_postprocess.py -------------------------------------------------------------------------------- /tests/test_evaluation_preview_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_evaluation_preview_integration.py -------------------------------------------------------------------------------- /tests/test_event_bus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_event_bus.py -------------------------------------------------------------------------------- /tests/test_event_bus_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_event_bus_helper.py -------------------------------------------------------------------------------- /tests/test_examples_end_to_end.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_examples_end_to_end.py -------------------------------------------------------------------------------- /tests/test_exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_exceptions.py -------------------------------------------------------------------------------- /tests/test_fireworks_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_fireworks_api.py -------------------------------------------------------------------------------- /tests/test_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_format.py -------------------------------------------------------------------------------- /tests/test_fractional_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_fractional_code.py -------------------------------------------------------------------------------- /tests/test_function_calling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_function_calling.py -------------------------------------------------------------------------------- /tests/test_gcp_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_gcp_tools.py -------------------------------------------------------------------------------- /tests/test_generic_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_generic_server.py -------------------------------------------------------------------------------- /tests/test_human_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_human_id.py -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_integration.py -------------------------------------------------------------------------------- /tests/test_json_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_json_schema.py -------------------------------------------------------------------------------- /tests/test_kwargs_validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_kwargs_validation.py -------------------------------------------------------------------------------- /tests/test_language_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_language_consistency.py -------------------------------------------------------------------------------- /tests/test_lean_prover.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_lean_prover.py -------------------------------------------------------------------------------- /tests/test_lean_prover_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_lean_prover_runner.py -------------------------------------------------------------------------------- /tests/test_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_length.py -------------------------------------------------------------------------------- /tests/test_list_comparison_math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_list_comparison_math_reward.py -------------------------------------------------------------------------------- /tests/test_logs_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_logs_server.py -------------------------------------------------------------------------------- /tests/test_logs_server_simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_logs_server_simple.py -------------------------------------------------------------------------------- /tests/test_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_math.py -------------------------------------------------------------------------------- /tests/test_message_field_filtering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_message_field_filtering.py -------------------------------------------------------------------------------- /tests/test_minimal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_minimal.py -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_models.py -------------------------------------------------------------------------------- /tests/test_models_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_models_rl.py -------------------------------------------------------------------------------- /tests/test_multiple_choice_math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_multiple_choice_math_reward.py -------------------------------------------------------------------------------- /tests/test_n_variant_batch_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_n_variant_batch_integration.py -------------------------------------------------------------------------------- /tests/test_n_variant_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_n_variant_integration.py -------------------------------------------------------------------------------- /tests/test_openai_compatibility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_openai_compatibility.py -------------------------------------------------------------------------------- /tests/test_openai_rft_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_openai_rft_integration.py -------------------------------------------------------------------------------- /tests/test_openeval_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_openeval_integration.py -------------------------------------------------------------------------------- /tests/test_packaging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_packaging.py -------------------------------------------------------------------------------- /tests/test_parallel_rollouts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_parallel_rollouts.py -------------------------------------------------------------------------------- /tests/test_platform_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_platform_api.py -------------------------------------------------------------------------------- /tests/test_quickstart_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_quickstart_utils.py -------------------------------------------------------------------------------- /tests/test_readiness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_readiness.py -------------------------------------------------------------------------------- /tests/test_reasoning_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_reasoning_steps.py -------------------------------------------------------------------------------- /tests/test_repetition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_repetition.py -------------------------------------------------------------------------------- /tests/test_repetition_debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_repetition_debug.py -------------------------------------------------------------------------------- /tests/test_retry_mechanism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_retry_mechanism.py -------------------------------------------------------------------------------- /tests/test_reward_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_reward_function.py -------------------------------------------------------------------------------- /tests/test_reward_protocol_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_reward_protocol_import.py -------------------------------------------------------------------------------- /tests/test_rl_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_rl_processing.py -------------------------------------------------------------------------------- /tests/test_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_server.py -------------------------------------------------------------------------------- /tests/test_show_results_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_show_results_url.py -------------------------------------------------------------------------------- /tests/test_status_migration_changes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_status_migration_changes.py -------------------------------------------------------------------------------- /tests/test_status_migration_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_status_migration_integration.py -------------------------------------------------------------------------------- /tests/test_status_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_status_model.py -------------------------------------------------------------------------------- /tests/test_tag_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_tag_count.py -------------------------------------------------------------------------------- /tests/test_tau_bench_airline_smoke.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_tau_bench_airline_smoke.py -------------------------------------------------------------------------------- /tests/test_typed_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_typed_interface.py -------------------------------------------------------------------------------- /tests/test_typed_interface_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_typed_interface_rl.py -------------------------------------------------------------------------------- /tests/test_upload_entrypoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_upload_entrypoint.py -------------------------------------------------------------------------------- /tests/test_url_handling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_url_handling.py -------------------------------------------------------------------------------- /tests/test_vite_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/tests/test_vite_server.py -------------------------------------------------------------------------------- /typescript/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/.gitignore -------------------------------------------------------------------------------- /typescript/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/README.md -------------------------------------------------------------------------------- /typescript/bun.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/bun.lock -------------------------------------------------------------------------------- /typescript/index.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/index.ts -------------------------------------------------------------------------------- /typescript/logging/fireworks-transport.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/logging/fireworks-transport.ts -------------------------------------------------------------------------------- /typescript/logging/fireworks-vercel.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/logging/fireworks-vercel.ts -------------------------------------------------------------------------------- /typescript/logging/logger.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/logging/logger.ts -------------------------------------------------------------------------------- /typescript/models/exceptions.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/models/exceptions.ts -------------------------------------------------------------------------------- /typescript/models/status.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/models/status.ts -------------------------------------------------------------------------------- /typescript/models/types.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/models/types.ts -------------------------------------------------------------------------------- /typescript/package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/package.json -------------------------------------------------------------------------------- /typescript/pnpm-lock.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/pnpm-lock.yaml -------------------------------------------------------------------------------- /typescript/tsconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/typescript/tsconfig.json -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/uv.lock -------------------------------------------------------------------------------- /vendor/tau2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/__init__.py -------------------------------------------------------------------------------- /vendor/tau2/agent/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/agent/README.md -------------------------------------------------------------------------------- /vendor/tau2/agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/agent/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/agent/base.py -------------------------------------------------------------------------------- /vendor/tau2/agent/llm_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/agent/llm_agent.py -------------------------------------------------------------------------------- /vendor/tau2/api_service/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Sierra 2 | -------------------------------------------------------------------------------- /vendor/tau2/api_service/api_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/api_service/api_config.py -------------------------------------------------------------------------------- /vendor/tau2/api_service/data_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/api_service/data_model.py -------------------------------------------------------------------------------- /vendor/tau2/api_service/simulation_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/api_service/simulation_service.py -------------------------------------------------------------------------------- /vendor/tau2/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/cli.py -------------------------------------------------------------------------------- /vendor/tau2/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/config.py -------------------------------------------------------------------------------- /vendor/tau2/data/domains/airline/db.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/airline/db.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/airline/policy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/airline/policy.md -------------------------------------------------------------------------------- /vendor/tau2/data/domains/airline/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/airline/tasks.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/mock/db.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/mock/db.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/mock/policy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/mock/policy.md -------------------------------------------------------------------------------- /vendor/tau2/data/domains/mock/policy_solo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/mock/policy_solo.md -------------------------------------------------------------------------------- /vendor/tau2/data/domains/mock/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/mock/tasks.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/retail/db.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/retail/db.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/retail/policy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/retail/policy.md -------------------------------------------------------------------------------- /vendor/tau2/data/domains/retail/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/retail/tasks.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/telecom/db.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/telecom/db.toml -------------------------------------------------------------------------------- /vendor/tau2/data/domains/telecom/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/telecom/tasks.json -------------------------------------------------------------------------------- /vendor/tau2/data/domains/telecom/user_db.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data/domains/telecom/user_db.toml -------------------------------------------------------------------------------- /vendor/tau2/data_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/data_model/message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data_model/message.py -------------------------------------------------------------------------------- /vendor/tau2/data_model/simulation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data_model/simulation.py -------------------------------------------------------------------------------- /vendor/tau2/data_model/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/data_model/tasks.py -------------------------------------------------------------------------------- /vendor/tau2/domains/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/README.md -------------------------------------------------------------------------------- /vendor/tau2/domains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/domains/airline/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Sierra 2 | -------------------------------------------------------------------------------- /vendor/tau2/domains/airline/data_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/airline/data_model.py -------------------------------------------------------------------------------- /vendor/tau2/domains/airline/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/airline/environment.py -------------------------------------------------------------------------------- /vendor/tau2/domains/airline/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/airline/tasks.json -------------------------------------------------------------------------------- /vendor/tau2/domains/airline/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/airline/tools.py -------------------------------------------------------------------------------- /vendor/tau2/domains/airline/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/airline/utils.py -------------------------------------------------------------------------------- /vendor/tau2/domains/mock/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Sierra 2 | -------------------------------------------------------------------------------- /vendor/tau2/domains/mock/data_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/mock/data_model.py -------------------------------------------------------------------------------- /vendor/tau2/domains/mock/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/mock/environment.py -------------------------------------------------------------------------------- /vendor/tau2/domains/mock/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/mock/tools.py -------------------------------------------------------------------------------- /vendor/tau2/domains/mock/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/mock/utils.py -------------------------------------------------------------------------------- /vendor/tau2/domains/retail/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Sierra 2 | -------------------------------------------------------------------------------- /vendor/tau2/domains/retail/data_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/retail/data_model.py -------------------------------------------------------------------------------- /vendor/tau2/domains/retail/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/retail/environment.py -------------------------------------------------------------------------------- /vendor/tau2/domains/retail/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/retail/tools.py -------------------------------------------------------------------------------- /vendor/tau2/domains/retail/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/retail/utils.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Sierra 2 | -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/data_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/data_model.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/environment.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/tasks/const.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/tasks/const.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/tasks/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/tasks/manager.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/tasks/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/tasks/utils.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/tools.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/user_data_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/user_data_model.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/user_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/user_tools.py -------------------------------------------------------------------------------- /vendor/tau2/domains/telecom/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/domains/telecom/utils.py -------------------------------------------------------------------------------- /vendor/tau2/environment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/environment/db.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/environment/db.py -------------------------------------------------------------------------------- /vendor/tau2/environment/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/environment/environment.py -------------------------------------------------------------------------------- /vendor/tau2/environment/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/environment/server.py -------------------------------------------------------------------------------- /vendor/tau2/environment/tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/environment/tool.py -------------------------------------------------------------------------------- /vendor/tau2/environment/toolkit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/environment/toolkit.py -------------------------------------------------------------------------------- /vendor/tau2/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/evaluator/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/evaluator/evaluator.py -------------------------------------------------------------------------------- /vendor/tau2/evaluator/evaluator_action.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/evaluator/evaluator_action.py -------------------------------------------------------------------------------- /vendor/tau2/evaluator/evaluator_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/evaluator/evaluator_base.py -------------------------------------------------------------------------------- /vendor/tau2/evaluator/evaluator_communicate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/evaluator/evaluator_communicate.py -------------------------------------------------------------------------------- /vendor/tau2/evaluator/evaluator_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/evaluator/evaluator_env.py -------------------------------------------------------------------------------- /vendor/tau2/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/metrics/agent_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/metrics/agent_metrics.py -------------------------------------------------------------------------------- /vendor/tau2/metrics/break_down_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/metrics/break_down_metrics.py -------------------------------------------------------------------------------- /vendor/tau2/orchestrator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/orchestrator/orchestrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/orchestrator/orchestrator.py -------------------------------------------------------------------------------- /vendor/tau2/orchestrator/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/orchestrator/utils.py -------------------------------------------------------------------------------- /vendor/tau2/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/registry.py -------------------------------------------------------------------------------- /vendor/tau2/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/run.py -------------------------------------------------------------------------------- /vendor/tau2/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/scripts/check_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/scripts/check_data.py -------------------------------------------------------------------------------- /vendor/tau2/scripts/show_domain_doc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/scripts/show_domain_doc.py -------------------------------------------------------------------------------- /vendor/tau2/scripts/start_servers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/scripts/start_servers.py -------------------------------------------------------------------------------- /vendor/tau2/scripts/view_simulations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/scripts/view_simulations.py -------------------------------------------------------------------------------- /vendor/tau2/user/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vendor/tau2/user/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/user/base.py -------------------------------------------------------------------------------- /vendor/tau2/user/user_simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/user/user_simulator.py -------------------------------------------------------------------------------- /vendor/tau2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/utils/__init__.py -------------------------------------------------------------------------------- /vendor/tau2/utils/display.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/utils/display.py -------------------------------------------------------------------------------- /vendor/tau2/utils/io_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/utils/io_utils.py -------------------------------------------------------------------------------- /vendor/tau2/utils/llm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/utils/llm_utils.py -------------------------------------------------------------------------------- /vendor/tau2/utils/pydantic_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/utils/pydantic_utils.py -------------------------------------------------------------------------------- /vendor/tau2/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vendor/tau2/utils/utils.py -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/versioneer.py -------------------------------------------------------------------------------- /vite-app/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/.gitignore -------------------------------------------------------------------------------- /vite-app/CHART_EXPORT_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/CHART_EXPORT_README.md -------------------------------------------------------------------------------- /vite-app/data/logs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/data/logs.json -------------------------------------------------------------------------------- /vite-app/data/logs.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/data/logs.jsonl -------------------------------------------------------------------------------- /vite-app/dist/assets/favicon-BkAAWQga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/dist/assets/favicon-BkAAWQga.png -------------------------------------------------------------------------------- /vite-app/dist/assets/index-BIhepl19.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/dist/assets/index-BIhepl19.css -------------------------------------------------------------------------------- /vite-app/dist/assets/index-DaovgarD.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/dist/assets/index-DaovgarD.js -------------------------------------------------------------------------------- /vite-app/dist/assets/index-DaovgarD.js.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/dist/assets/index-DaovgarD.js.map -------------------------------------------------------------------------------- /vite-app/dist/assets/logo-light-BprIBJQW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/dist/assets/logo-light-BprIBJQW.png -------------------------------------------------------------------------------- /vite-app/dist/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/dist/index.html -------------------------------------------------------------------------------- /vite-app/docs/runtime-configuration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/docs/runtime-configuration.md -------------------------------------------------------------------------------- /vite-app/eslint.config.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/eslint.config.js -------------------------------------------------------------------------------- /vite-app/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/index.html -------------------------------------------------------------------------------- /vite-app/package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/package.json -------------------------------------------------------------------------------- /vite-app/pnpm-lock.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/pnpm-lock.yaml -------------------------------------------------------------------------------- /vite-app/src/App.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/App.tsx -------------------------------------------------------------------------------- /vite-app/src/GlobalState.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/GlobalState.tsx -------------------------------------------------------------------------------- /vite-app/src/assets/logo-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/assets/logo-light.png -------------------------------------------------------------------------------- /vite-app/src/components/Button.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/Button.tsx -------------------------------------------------------------------------------- /vite-app/src/components/ChartExport.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/ChartExport.tsx -------------------------------------------------------------------------------- /vite-app/src/components/ChatInterface.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/ChatInterface.tsx -------------------------------------------------------------------------------- /vite-app/src/components/Dashboard.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/Dashboard.tsx -------------------------------------------------------------------------------- /vite-app/src/components/EvaluationRow.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/EvaluationRow.tsx -------------------------------------------------------------------------------- /vite-app/src/components/EvaluationTable.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/EvaluationTable.tsx -------------------------------------------------------------------------------- /vite-app/src/components/FilterInput.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/FilterInput.tsx -------------------------------------------------------------------------------- /vite-app/src/components/FilterSelector.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/FilterSelector.tsx -------------------------------------------------------------------------------- /vite-app/src/components/JSONTooltip.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/JSONTooltip.tsx -------------------------------------------------------------------------------- /vite-app/src/components/LogsSection.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/LogsSection.tsx -------------------------------------------------------------------------------- /vite-app/src/components/MessageBubble.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/MessageBubble.tsx -------------------------------------------------------------------------------- /vite-app/src/components/MetadataSection.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/MetadataSection.tsx -------------------------------------------------------------------------------- /vite-app/src/components/PivotTab.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/PivotTab.tsx -------------------------------------------------------------------------------- /vite-app/src/components/PivotTable.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/PivotTable.tsx -------------------------------------------------------------------------------- /vite-app/src/components/SearchableSelect.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/SearchableSelect.tsx -------------------------------------------------------------------------------- /vite-app/src/components/Select.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/Select.tsx -------------------------------------------------------------------------------- /vite-app/src/components/StatusIndicator.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/StatusIndicator.tsx -------------------------------------------------------------------------------- /vite-app/src/components/TabButton.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/TabButton.tsx -------------------------------------------------------------------------------- /vite-app/src/components/TableContainer.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/TableContainer.tsx -------------------------------------------------------------------------------- /vite-app/src/components/Tooltip.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/components/Tooltip.tsx -------------------------------------------------------------------------------- /vite-app/src/config.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/config.ts -------------------------------------------------------------------------------- /vite-app/src/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/favicon.png -------------------------------------------------------------------------------- /vite-app/src/hooks/usePivotData.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/hooks/usePivotData.ts -------------------------------------------------------------------------------- /vite-app/src/index.css: -------------------------------------------------------------------------------- 1 | 2 | @import "tailwindcss"; -------------------------------------------------------------------------------- /vite-app/src/main.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/main.tsx -------------------------------------------------------------------------------- /vite-app/src/styles/common.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/styles/common.ts -------------------------------------------------------------------------------- /vite-app/src/types/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/types/README.md -------------------------------------------------------------------------------- /vite-app/src/types/configs.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/types/configs.ts -------------------------------------------------------------------------------- /vite-app/src/types/eval-protocol.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/types/eval-protocol.ts -------------------------------------------------------------------------------- /vite-app/src/types/global.d.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/types/global.d.ts -------------------------------------------------------------------------------- /vite-app/src/types/websocket.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/types/websocket.ts -------------------------------------------------------------------------------- /vite-app/src/typings.d.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/typings.d.ts -------------------------------------------------------------------------------- /vite-app/src/util/field-processors.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/field-processors.ts -------------------------------------------------------------------------------- /vite-app/src/util/filter-utils.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/filter-utils.ts -------------------------------------------------------------------------------- /vite-app/src/util/flatten-json.test.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/flatten-json.test.ts -------------------------------------------------------------------------------- /vite-app/src/util/flatten-json.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/flatten-json.ts -------------------------------------------------------------------------------- /vite-app/src/util/pivot.test.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/pivot.test.ts -------------------------------------------------------------------------------- /vite-app/src/util/pivot.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/pivot.ts -------------------------------------------------------------------------------- /vite-app/src/util/query-params.test.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/query-params.test.ts -------------------------------------------------------------------------------- /vite-app/src/util/query-params.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/src/util/query-params.ts -------------------------------------------------------------------------------- /vite-app/tsconfig.app.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/tsconfig.app.json -------------------------------------------------------------------------------- /vite-app/tsconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/tsconfig.json -------------------------------------------------------------------------------- /vite-app/tsconfig.node.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/tsconfig.node.json -------------------------------------------------------------------------------- /vite-app/vite.config.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eval-protocol/python-sdk/HEAD/vite-app/vite.config.ts --------------------------------------------------------------------------------