├── .gitignore ├── LICENSE ├── README.md ├── configs ├── miniwob │ └── eval_openai_agent.yml └── webarena │ └── eval_openai_agent.yml ├── notebooks ├── others │ ├── eval_airlinecrm_metrics.ipynb │ ├── eval_compression.ipynb │ ├── eval_miniwob_llama_metrics.ipynb │ └── eval_miniwob_metrics.ipynb └── webarena │ ├── context_len_hist_webarena_step_vs_flat.ipynb │ ├── final_webarena_step_vs_all.ipynb │ └── plots_webarena_step_vs_all.ipynb ├── requirements.txt ├── scripts ├── evaluate │ ├── eval_miniwob.py │ └── eval_webarena.py └── setup │ └── auto_login_webarena.py ├── setup.py └── src └── webagents_step ├── agents ├── agent.py ├── finetuned_agent.py ├── keyboard_agent.py ├── playback_agent.py ├── prompt_agent.py └── step_agent.py ├── environment ├── env.py ├── liveweb.py ├── miniwob.py └── webarena.py ├── parser ├── miniwob_parser.py └── playwright_parser_webarena.py ├── prompts ├── miniwob │ ├── flat_fewshot_template.py │ └── step_fewshot_template.py └── webarena │ ├── flat_fewshot_template.py │ └── step_fewshot_template.py └── utils ├── data_prep.py ├── llm.py └── stack.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/README.md -------------------------------------------------------------------------------- /configs/miniwob/eval_openai_agent.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/configs/miniwob/eval_openai_agent.yml -------------------------------------------------------------------------------- /configs/webarena/eval_openai_agent.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/configs/webarena/eval_openai_agent.yml -------------------------------------------------------------------------------- /notebooks/others/eval_airlinecrm_metrics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/others/eval_airlinecrm_metrics.ipynb -------------------------------------------------------------------------------- /notebooks/others/eval_compression.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/others/eval_compression.ipynb -------------------------------------------------------------------------------- /notebooks/others/eval_miniwob_llama_metrics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/others/eval_miniwob_llama_metrics.ipynb -------------------------------------------------------------------------------- /notebooks/others/eval_miniwob_metrics.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/others/eval_miniwob_metrics.ipynb -------------------------------------------------------------------------------- /notebooks/webarena/context_len_hist_webarena_step_vs_flat.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/webarena/context_len_hist_webarena_step_vs_flat.ipynb -------------------------------------------------------------------------------- /notebooks/webarena/final_webarena_step_vs_all.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/webarena/final_webarena_step_vs_all.ipynb -------------------------------------------------------------------------------- /notebooks/webarena/plots_webarena_step_vs_all.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/notebooks/webarena/plots_webarena_step_vs_all.ipynb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/evaluate/eval_miniwob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/scripts/evaluate/eval_miniwob.py -------------------------------------------------------------------------------- /scripts/evaluate/eval_webarena.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/scripts/evaluate/eval_webarena.py -------------------------------------------------------------------------------- /scripts/setup/auto_login_webarena.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/scripts/setup/auto_login_webarena.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/setup.py -------------------------------------------------------------------------------- /src/webagents_step/agents/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/agents/agent.py -------------------------------------------------------------------------------- /src/webagents_step/agents/finetuned_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/agents/finetuned_agent.py -------------------------------------------------------------------------------- /src/webagents_step/agents/keyboard_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/agents/keyboard_agent.py -------------------------------------------------------------------------------- /src/webagents_step/agents/playback_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/agents/playback_agent.py -------------------------------------------------------------------------------- /src/webagents_step/agents/prompt_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/agents/prompt_agent.py -------------------------------------------------------------------------------- /src/webagents_step/agents/step_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/agents/step_agent.py -------------------------------------------------------------------------------- /src/webagents_step/environment/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/environment/env.py -------------------------------------------------------------------------------- /src/webagents_step/environment/liveweb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/environment/liveweb.py -------------------------------------------------------------------------------- /src/webagents_step/environment/miniwob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/environment/miniwob.py -------------------------------------------------------------------------------- /src/webagents_step/environment/webarena.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/environment/webarena.py -------------------------------------------------------------------------------- /src/webagents_step/parser/miniwob_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/parser/miniwob_parser.py -------------------------------------------------------------------------------- /src/webagents_step/parser/playwright_parser_webarena.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/parser/playwright_parser_webarena.py -------------------------------------------------------------------------------- /src/webagents_step/prompts/miniwob/flat_fewshot_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/prompts/miniwob/flat_fewshot_template.py -------------------------------------------------------------------------------- /src/webagents_step/prompts/miniwob/step_fewshot_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/prompts/miniwob/step_fewshot_template.py -------------------------------------------------------------------------------- /src/webagents_step/prompts/webarena/flat_fewshot_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/prompts/webarena/flat_fewshot_template.py -------------------------------------------------------------------------------- /src/webagents_step/prompts/webarena/step_fewshot_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/prompts/webarena/step_fewshot_template.py -------------------------------------------------------------------------------- /src/webagents_step/utils/data_prep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/utils/data_prep.py -------------------------------------------------------------------------------- /src/webagents_step/utils/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/utils/llm.py -------------------------------------------------------------------------------- /src/webagents_step/utils/stack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asappresearch/webagents-step/HEAD/src/webagents_step/utils/stack.py --------------------------------------------------------------------------------