├── .gitignore ├── README.md ├── assets └── teaser.png ├── environment.yml ├── scripts ├── config │ └── main │ │ ├── default.yaml │ │ ├── webarena_eval.yaml │ │ ├── webarena_rl.yaml │ │ ├── webvoyager_eval.yaml │ │ └── webvoyager_rl.yaml ├── create_webarena_containers.sh ├── prompts │ ├── create_prompt_json.py │ ├── evaluator_prompt.txt │ ├── webarena.json │ └── webvoyager.json ├── run.py ├── webarena_eval.sh ├── webarena_train.sh ├── webvoyager_eval.sh └── webvoyager_train.sh ├── setup.py ├── tasks ├── convert_webarena_tasks_to_jsonl.py ├── webarena_test_data.jsonl ├── webarena_train_data.jsonl ├── webvoyager_subset.jsonl ├── webvoyager_test_data.jsonl └── webvoyager_train_data.jsonl └── tti ├── algorithms ├── __init__.py ├── base │ ├── __init__.py │ └── base_trainer.py ├── filteredbc │ ├── __init__.py │ └── trainer.py ├── onpolicy_train_loop.py ├── utils.py └── worker_collect_loop.py ├── data ├── __init__.py └── utils.py ├── environment ├── __init__.py ├── env_utils.py └── webgym │ ├── __init__.py │ ├── helper_functions.py │ ├── utils.py │ ├── utils_eval.py │ ├── utils_webarena.py │ └── webgym.py ├── misc.py └── models ├── __init__.py ├── gemma_vllm_agent.py └── prompt_processor.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/README.md -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/environment.yml -------------------------------------------------------------------------------- /scripts/config/main/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/config/main/default.yaml -------------------------------------------------------------------------------- /scripts/config/main/webarena_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/config/main/webarena_eval.yaml -------------------------------------------------------------------------------- /scripts/config/main/webarena_rl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/config/main/webarena_rl.yaml -------------------------------------------------------------------------------- /scripts/config/main/webvoyager_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/config/main/webvoyager_eval.yaml -------------------------------------------------------------------------------- /scripts/config/main/webvoyager_rl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/config/main/webvoyager_rl.yaml -------------------------------------------------------------------------------- /scripts/create_webarena_containers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/create_webarena_containers.sh -------------------------------------------------------------------------------- /scripts/prompts/create_prompt_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/prompts/create_prompt_json.py -------------------------------------------------------------------------------- /scripts/prompts/evaluator_prompt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/prompts/evaluator_prompt.txt -------------------------------------------------------------------------------- /scripts/prompts/webarena.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/prompts/webarena.json -------------------------------------------------------------------------------- /scripts/prompts/webvoyager.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/prompts/webvoyager.json -------------------------------------------------------------------------------- /scripts/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/run.py -------------------------------------------------------------------------------- /scripts/webarena_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/webarena_eval.sh -------------------------------------------------------------------------------- /scripts/webarena_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/webarena_train.sh -------------------------------------------------------------------------------- /scripts/webvoyager_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/webvoyager_eval.sh -------------------------------------------------------------------------------- /scripts/webvoyager_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/scripts/webvoyager_train.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/convert_webarena_tasks_to_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tasks/convert_webarena_tasks_to_jsonl.py -------------------------------------------------------------------------------- /tasks/webarena_test_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tasks/webarena_test_data.jsonl -------------------------------------------------------------------------------- /tasks/webarena_train_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tasks/webarena_train_data.jsonl -------------------------------------------------------------------------------- /tasks/webvoyager_subset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tasks/webvoyager_subset.jsonl -------------------------------------------------------------------------------- /tasks/webvoyager_test_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tasks/webvoyager_test_data.jsonl -------------------------------------------------------------------------------- /tasks/webvoyager_train_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tasks/webvoyager_train_data.jsonl -------------------------------------------------------------------------------- /tti/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/__init__.py -------------------------------------------------------------------------------- /tti/algorithms/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/base/__init__.py -------------------------------------------------------------------------------- /tti/algorithms/base/base_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/base/base_trainer.py -------------------------------------------------------------------------------- /tti/algorithms/filteredbc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/filteredbc/__init__.py -------------------------------------------------------------------------------- /tti/algorithms/filteredbc/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/filteredbc/trainer.py -------------------------------------------------------------------------------- /tti/algorithms/onpolicy_train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/onpolicy_train_loop.py -------------------------------------------------------------------------------- /tti/algorithms/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/utils.py -------------------------------------------------------------------------------- /tti/algorithms/worker_collect_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/algorithms/worker_collect_loop.py -------------------------------------------------------------------------------- /tti/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/data/__init__.py -------------------------------------------------------------------------------- /tti/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/data/utils.py -------------------------------------------------------------------------------- /tti/environment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/__init__.py -------------------------------------------------------------------------------- /tti/environment/env_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/env_utils.py -------------------------------------------------------------------------------- /tti/environment/webgym/__init__.py: -------------------------------------------------------------------------------- 1 | from .webgym import WebBroswerGym, BatchedWebEnv -------------------------------------------------------------------------------- /tti/environment/webgym/helper_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/webgym/helper_functions.py -------------------------------------------------------------------------------- /tti/environment/webgym/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/webgym/utils.py -------------------------------------------------------------------------------- /tti/environment/webgym/utils_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/webgym/utils_eval.py -------------------------------------------------------------------------------- /tti/environment/webgym/utils_webarena.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/webgym/utils_webarena.py -------------------------------------------------------------------------------- /tti/environment/webgym/webgym.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/environment/webgym/webgym.py -------------------------------------------------------------------------------- /tti/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/misc.py -------------------------------------------------------------------------------- /tti/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/models/__init__.py -------------------------------------------------------------------------------- /tti/models/gemma_vllm_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/models/gemma_vllm_agent.py -------------------------------------------------------------------------------- /tti/models/prompt_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/test-time-interaction/TTI/HEAD/tti/models/prompt_processor.py --------------------------------------------------------------------------------