├── .gitattributes
├── .gitignore
├── AgentBench.old
    ├── configs
    │   ├── agents
    │   │   ├── do_nothing.yaml
    │   │   └── tgi_clients
    │   │   │   ├── AgentLM-13b.yaml
    │   │   │   ├── AgentLM-70b.yaml
    │   │   │   └── AgentLM-7b.yaml
    │   └── tasks
    │   │   ├── alfworld
    │   │       ├── dev.yaml
    │   │       └── std.yaml
    │   │   ├── card_game
    │   │       ├── dev.yaml
    │   │       ├── ext.yaml
    │   │       └── std.yaml
    │   │   ├── dbbench
    │   │       ├── dev.yaml
    │   │       └── std.yaml
    │   │   ├── knowledgegraph
    │   │       ├── dev.yaml
    │   │       └── std.yaml
    │   │   ├── mind2web
    │   │       ├── dev.yaml
    │   │       └── std.yaml
    │   │   ├── os_interaction
    │   │       ├── dev.yaml
    │   │       └── std.yaml
    │   │   └── webshop
    │   │       ├── dev.yaml
    │   │       └── std.yaml
    ├── data
    │   ├── alfworld
    │   │   ├── dev.json
    │   │   └── std.json
    │   ├── dbbench
    │   │   ├── dev.jsonl
    │   │   └── standard.jsonl
    │   ├── knowledgegraph
    │   │   ├── dev.json
    │   │   └── std.json
    │   ├── mind2web
    │   │   └── prompt
    │   │   │   ├── llm_prompt.json
    │   │   │   └── llm_prompt_cot.json
    │   └── os_interaction
    │   │   ├── data
    │   │       ├── 1
    │   │       │   └── stock.json
    │   │       ├── 2
    │   │       │   └── environment.json
    │   │       ├── 3
    │   │       │   └── ac.json
    │   │       ├── 4
    │   │       │   ├── N11.json
    │   │       │   ├── N225.json
    │   │       │   ├── N37.json
    │   │       │   ├── N4.json
    │   │       │   ├── N41.json
    │   │       │   ├── Q09.json
    │   │       │   ├── Q19.json
    │   │       │   ├── Q30.json
    │   │       │   ├── Q47.json
    │   │       │   └── Q49.json
    │   │       ├── 5
    │   │       │   └── new.json
    │   │       ├── 6
    │   │       │   └── new.json
    │   │       ├── 7
    │   │       │   └── 2023-06-07-00-13-14.json
    │   │       ├── 6-backup.json
    │   │       └── dev.json
    │   │   ├── res
    │   │       └── dockerfiles
    │   │       │   ├── default
    │   │       │   ├── packages
    │   │       │   └── ubuntu
    │   │   └── scripts
    │   │       ├── 1
    │   │           ├── check
    │   │           │   ├── 1.sh
    │   │           │   ├── containing.py
    │   │           │   ├── in.py
    │   │           │   ├── integer-match.py
    │   │           │   ├── size-match.py
    │   │           │   └── string-match.py
    │   │           ├── example
    │   │           │   └── 1.sh
    │   │           └── init
    │   │           │   ├── gen_words.sh
    │   │           │   ├── install_nettools.sh
    │   │           │   ├── nested_folders.sh
    │   │           │   └── stock-log.sh
    │   │       ├── 2
    │   │           ├── check
    │   │           │   ├── 1.sh
    │   │           │   ├── containing.py
    │   │           │   ├── in.py
    │   │           │   ├── integer-match.py
    │   │           │   ├── size-match.py
    │   │           │   └── string-match.py
    │   │           ├── example
    │   │           │   └── 1.sh
    │   │           └── init
    │   │           │   ├── gen_words.sh
    │   │           │   ├── install_nettools.sh
    │   │           │   ├── nested_folders.sh
    │   │           │   └── stock-log.sh
    │   │       ├── 3
    │   │           ├── check
    │   │           │   ├── 1.sh
    │   │           │   ├── containing.py
    │   │           │   ├── in.py
    │   │           │   ├── integer-match.py
    │   │           │   ├── size-match.py
    │   │           │   └── string-match.py
    │   │           ├── example
    │   │           │   └── 1.sh
    │   │           └── init
    │   │           │   ├── gen_words.sh
    │   │           │   ├── install_nettools.sh
    │   │           │   ├── nested_folders.sh
    │   │           │   └── stock-log.sh
    │   │       ├── 4
    │   │           ├── check
    │   │           │   ├── 1.sh
    │   │           │   ├── containing.py
    │   │           │   ├── in.py
    │   │           │   ├── integer-match.py
    │   │           │   ├── size-match.py
    │   │           │   └── string-match.py
    │   │           ├── example
    │   │           │   └── 1.sh
    │   │           └── init
    │   │           │   ├── gen_words.sh
    │   │           │   ├── install_nettools.sh
    │   │           │   ├── nested_folders.sh
    │   │           │   └── stock-log.sh
    │   │       ├── 5
    │   │           ├── check
    │   │           │   ├── containing.py
    │   │           │   ├── in.py
    │   │           │   ├── integer-match.py
    │   │           │   ├── size-match.py
    │   │           │   └── string-match.py
    │   │           ├── checking
    │   │           │   ├── 0.sh
    │   │           │   ├── 1.sh
    │   │           │   └── 2.sh
    │   │           ├── example
    │   │           │   ├── 0.sh
    │   │           │   ├── 1.sh
    │   │           │   └── 2.sh
    │   │           ├── file.json
    │   │           ├── init
    │   │           │   └── 1.sh
    │   │           ├── new.json
    │   │           └── prompt.md
    │   │       ├── 6
    │   │           ├── collected.json
    │   │           ├── collecting.py
    │   │           ├── new.json
    │   │           ├── permission.json
    │   │           └── stackoverflow
    │   │           │   └── spider.py
    │   │       ├── 7
    │   │           ├── check
    │   │           │   ├── 1.sh
    │   │           │   ├── containing.py
    │   │           │   ├── in.py
    │   │           │   ├── integer-match.py
    │   │           │   ├── size-match.py
    │   │           │   └── string-match.py
    │   │           ├── example
    │   │           │   └── 1.sh
    │   │           └── init
    │   │           │   ├── gen_words.sh
    │   │           │   ├── install_nettools.sh
    │   │           │   ├── nested_folders.sh
    │   │           │   └── stock-log.sh
    │   │       └── dev
    │   │           ├── check
    │   │               ├── 0.sh
    │   │               ├── containing.py
    │   │               ├── in.py
    │   │               ├── integer-match.py
    │   │               ├── size-match.py
    │   │               └── string-match.py
    │   │           ├── example
    │   │               └── 0.sh
    │   │           └── init
    │   │               └── stock-log.sh
    ├── eval.sh
    ├── eval
    │   ├── AgentLM-13b-eval-all.sh
    │   ├── AgentLM-70b-eval-all.sh
    │   ├── AgentLM-7b-eval-all.sh
    │   └── single-task
    │   │   ├── alfworld.sh
    │   │   ├── card.sh
    │   │   ├── db.sh
    │   │   ├── eval_single_setup.sh
    │   │   ├── kg.sh
    │   │   ├── mind2web.sh
    │   │   ├── os.sh
    │   │   └── webshop.sh
    ├── evaluate.py
    ├── readme-zh.md
    ├── readme.md
    ├── requirements.txt
    └── src
    │   ├── __init__.py
    │   ├── agent.py
    │   ├── agents
    │       ├── __init__.py
    │       ├── do_nothing_agent.py
    │       └── tgi_client.py
    │   ├── configs.py
    │   ├── task.py
    │   ├── tasks
    │       ├── __init__.py
    │       ├── alfworld
    │       │   ├── __init__.py
    │       │   ├── configs
    │       │   │   └── base_config.yaml
    │       │   ├── environment.py
    │       │   ├── prompts
    │       │   │   ├── alfworld_multiturn.json
    │       │   │   ├── alfworld_multiturn_cot.json
    │       │   │   ├── alfworld_multiturn_new.json
    │       │   │   ├── alfworld_multiturn_plan_first.json
    │       │   │   ├── alfworld_multiturn_pure.json
    │       │   │   ├── alfworld_multiturn_react.json
    │       │   │   └── alfworld_singleturn.json
    │       │   ├── task.py
    │       │   └── utils.py
    │       ├── card_game
    │       │   ├── AI_SDK
    │       │   │   ├── C++
    │       │   │   │   ├── Action.cpp
    │       │   │   │   ├── Action.hpp
    │       │   │   │   ├── Action_sample.cpp
    │       │   │   │   ├── Makefile
    │       │   │   │   ├── main.cpp
    │       │   │   │   └── sdk
    │       │   │   │   │   ├── ai_client.hpp
    │       │   │   │   │   └── jsoncpp
    │       │   │   │   │       ├── json
    │       │   │   │   │           ├── json-forwards.h
    │       │   │   │   │           └── json.h
    │       │   │   │   │       └── jsoncpp.cpp
    │       │   │   └── Python
    │       │   │   │   ├── AI_Cn.py
    │       │   │   │   ├── AI_Cn_en.py
    │       │   │   │   ├── AI_En.py
    │       │   │   │   ├── Action.py
    │       │   │   │   ├── action1.py
    │       │   │   │   ├── action2.py
    │       │   │   │   ├── action3.py
    │       │   │   │   ├── basline1.py
    │       │   │   │   ├── basline2.py
    │       │   │   │   ├── basline3.py
    │       │   │   │   ├── client.py
    │       │   │   │   ├── main.py
    │       │   │   │   ├── prompt
    │       │   │   │       ├── cn.py
    │       │   │   │       ├── cn_en.py
    │       │   │   │       └── en.py
    │       │   │   │   ├── sdk
    │       │   │   │       ├── CMakeLists.txt
    │       │   │   │       ├── ai_client.cpython-310-x86_64-linux-gnu.so
    │       │   │   │       ├── ai_client.cpython-38-x86_64-linux-gnu.so
    │       │   │   │       ├── ai_client.hpp
    │       │   │   │       ├── ai_client.pyi
    │       │   │   │       ├── build
    │       │   │   │       │   ├── CMakeCache.txt
    │       │   │   │       │   ├── CMakeFiles
    │       │   │   │       │   │   ├── 3.25.0
    │       │   │   │       │   │   │   ├── CMakeCXXCompiler.cmake
    │       │   │   │       │   │   │   ├── CMakeDetermineCompilerABI_CXX.bin
    │       │   │   │       │   │   │   ├── CMakeSystem.cmake
    │       │   │   │       │   │   │   └── CompilerIdCXX
    │       │   │   │       │   │   │   │   ├── CMakeCXXCompilerId.cpp
    │       │   │   │       │   │   │   │   └── a.out
    │       │   │   │       │   │   ├── CMakeDirectoryInformation.cmake
    │       │   │   │       │   │   ├── CMakeOutput.log
    │       │   │   │       │   │   ├── Makefile.cmake
    │       │   │   │       │   │   ├── Makefile2
    │       │   │   │       │   │   ├── TargetDirectories.txt
    │       │   │   │       │   │   ├── ai_client.dir
    │       │   │   │       │   │   │   ├── DependInfo.cmake
    │       │   │   │       │   │   │   ├── build.make
    │       │   │   │       │   │   │   ├── cmake_clean.cmake
    │       │   │   │       │   │   │   ├── compiler_depend.make
    │       │   │   │       │   │   │   ├── compiler_depend.ts
    │       │   │   │       │   │   │   ├── depend.make
    │       │   │   │       │   │   │   ├── flags.make
    │       │   │   │       │   │   │   ├── jsoncpp
    │       │   │   │       │   │   │   │   ├── jsoncpp.cpp.o
    │       │   │   │       │   │   │   │   └── jsoncpp.cpp.o.d
    │       │   │   │       │   │   │   ├── link.txt
    │       │   │   │       │   │   │   ├── progress.make
    │       │   │   │       │   │   │   ├── py_ai_sdk.cpp.o
    │       │   │   │       │   │   │   └── py_ai_sdk.cpp.o.d
    │       │   │   │       │   │   ├── cmake.check_cache
    │       │   │   │       │   │   └── progress.marks
    │       │   │   │       │   ├── Makefile
    │       │   │   │       │   ├── bin
    │       │   │   │       │   │   └── ai_client.cpython-310-x86_64-linux-gnu.so
    │       │   │   │       │   └── cmake_install.cmake
    │       │   │   │       ├── jsoncpp
    │       │   │   │       │   ├── json
    │       │   │   │       │   │   ├── json-forwards.h
    │       │   │   │       │   │   └── json.h
    │       │   │   │       │   └── jsoncpp.cpp
    │       │   │   │       ├── py_ai_sdk.cpp
    │       │   │   │       ├── py_json_cast.hpp
    │       │   │   │       └── pyd
    │       │   │   │       │   ├── ai_client.cp38-win_amd64.pyd
    │       │   │   │       │   ├── ai_client.cpython-310-x86_64-linux-gnu.so
    │       │   │   │       │   ├── ai_client.cpython-36m-x86_64-linux-gnu.so
    │       │   │   │       │   ├── ai_client.cpython-38-darwin.so
    │       │   │   │       │   └── ai_client.cpython-38-x86_64-linux-gnu.so
    │       │   │   │   └── test.py
    │       │   ├── Tools
    │       │   │   └── player.py
    │       │   ├── __init__.py
    │       │   ├── judger
    │       │   │   ├── cal_metric.py
    │       │   │   ├── judger.py
    │       │   │   ├── rserver.py
    │       │   │   └── run_all.py
    │       │   ├── logic
    │       │   │   ├── .gitignore
    │       │   │   ├── Makefile
    │       │   │   ├── bin
    │       │   │   │   └── main
    │       │   │   └── src
    │       │   │   │   ├── aqua_sdk.cpp
    │       │   │   │   ├── fish.cpp
    │       │   │   │   ├── fish.h
    │       │   │   │   ├── fishset.cpp
    │       │   │   │   ├── fishset.h
    │       │   │   │   ├── game.cpp
    │       │   │   │   ├── game.h
    │       │   │   │   ├── jsoncpp
    │       │   │   │       ├── json
    │       │   │   │       │   ├── json-forwards.h
    │       │   │   │       │   └── json.h
    │       │   │   │       └── jsoncpp.cpp
    │       │   │   │   ├── main
    │       │   │   │   ├── main.cpp
    │       │   │   │   ├── player.cpp
    │       │   │   │   ├── player.h
    │       │   │   │   └── timer.h
    │       │   ├── server.py
    │       │   ├── task.py
    │       │   └── utils.py
    │       ├── composite_task.py
    │       ├── dbbench
    │       │   ├── Interaction.py
    │       │   ├── __init__.py
    │       │   └── requirements.txt
    │       ├── example_task.py
    │       ├── knowledgegraph
    │       │   ├── __init__.py
    │       │   ├── api.py
    │       │   ├── ontology
    │       │   │   ├── fb_roles
    │       │   │   ├── fb_types
    │       │   │   ├── reverse_properties
    │       │   │   └── vocab.json
    │       │   ├── requirements.txt
    │       │   ├── task.py
    │       │   └── utils
    │       │   │   ├── logic_form_util.py
    │       │   │   ├── semparse_util.py
    │       │   │   └── sparql_executer.py
    │       ├── mind2web
    │       │   ├── __init__.py
    │       │   ├── data_utils
    │       │   │   ├── __init__.py
    │       │   │   └── dom_utils.py
    │       │   ├── dataloader.py
    │       │   └── task.py
    │       ├── os_interaction
    │       │   ├── __init__.py
    │       │   ├── images.py
    │       │   ├── requirements.txt
    │       │   └── task.py
    │       └── webshop
    │       │   ├── .github
    │       │       ├── ISSUE_TEMPLATE.md
    │       │       ├── PULL_REQUEST_TEMPLATE.md
    │       │       └── workflows
    │       │       │   └── pytest.yml
    │       │   ├── .gitignore
    │       │   ├── LICENSE.md
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── assets
    │       │       ├── diagram.gif
    │       │       ├── model_ckpts.png
    │       │       └── transfer-logic.png
    │       │   ├── baseline_models
    │       │       ├── .gitignore
    │       │       ├── README.md
    │       │       ├── agent.py
    │       │       ├── env.py
    │       │       ├── generate_search.py
    │       │       ├── logger.py
    │       │       ├── models
    │       │       │   ├── bert.py
    │       │       │   ├── modules.py
    │       │       │   └── rnn.py
    │       │       ├── requirements.txt
    │       │       ├── test.py
    │       │       ├── train_choice_il.py
    │       │       ├── train_rl.py
    │       │       └── train_search_il.py
    │       │   ├── conftest.py
    │       │   ├── requirements.txt
    │       │   ├── run_dev.sh
    │       │   ├── run_envs
    │       │       ├── run_web_agent_site_env.py
    │       │       └── run_web_agent_text_env.py
    │       │   ├── run_prod.sh
    │       │   ├── run_web_agent_site_env.sh
    │       │   ├── run_web_agent_text_env.sh
    │       │   ├── search_engine
    │       │       ├── convert_product_file_format.py
    │       │       ├── lucene_searcher.py
    │       │       └── run_indexing.sh
    │       │   ├── setup.sh
    │       │   ├── tests
    │       │       ├── transfer
    │       │       │   ├── mocks
    │       │       │   │   ├── mock_parse_item_page_amz
    │       │       │   │   ├── mock_parse_item_page_ebay
    │       │       │   │   ├── mock_parse_item_page_ws
    │       │       │   │   ├── mock_parse_item_page_ws_desc
    │       │       │   │   ├── mock_parse_item_page_ws_feat
    │       │       │   │   ├── mock_parse_results_amz
    │       │       │   │   ├── mock_parse_results_ebay
    │       │       │   │   └── mock_parse_results_ws
    │       │       │   └── test_predict_help.py
    │       │       └── web-agent-site
    │       │       │   ├── engine
    │       │       │       ├── test_goal.py
    │       │       │       └── test_normalize.py
    │       │       │   └── test_utils.py
    │       │   ├── transfer
    │       │       ├── README.md
    │       │       ├── __init__.py
    │       │       ├── app.py
    │       │       ├── predict_help.py
    │       │       └── webshop_lite.py
    │       │   └── web_agent_site
    │       │       ├── __init__.py
    │       │       ├── app.py
    │       │       ├── attributes
    │       │           ├── annotate.py
    │       │           └── generate_attrs.py
    │       │       ├── engine
    │       │           ├── __init__.py
    │       │           ├── engine.py
    │       │           ├── goal.py
    │       │           └── normalize.py
    │       │       ├── envs
    │       │           ├── __init__.py
    │       │           ├── chromedriver
    │       │           ├── web_agent_site_env.py
    │       │           └── web_agent_text_env.py
    │       │       ├── models
    │       │           ├── __init__.py
    │       │           └── models.py
    │       │       ├── static
    │       │           ├── images
    │       │           │   └── no-image-available.png
    │       │           └── style.css
    │       │       ├── templates
    │       │           ├── attributes_page.html
    │       │           ├── description_page.html
    │       │           ├── done_page.html
    │       │           ├── features_page.html
    │       │           ├── item_page.html
    │       │           ├── results_page.html
    │       │           ├── review_page.html
    │       │           └── search_page.html
    │       │       └── utils.py
    │   └── utils.py
├── README-zh.md
├── README.md
├── assets
    ├── head-figure.png
    ├── head-figure.svg
    ├── main-figure.png
    └── main-figure.svg
├── docker
    ├── agentlm-13b.yml
    ├── agentlm-70b.yml
    └── agentlm-7b.yml
├── docs
    ├── README.md
    ├── index.html
    └── static
    │   ├── css
    │       ├── bulma-carousel.min.css
    │       ├── bulma-slider.min.css
    │       ├── bulma.css.map.txt
    │       ├── bulma.min.css
    │       ├── fontawesome.all.min.css
    │       └── index.css
    │   ├── images
    │       ├── case-study.svg
    │       ├── error-analysis.svg
    │       ├── favicon.ico
    │       ├── head-figure.png
    │       ├── head-figure.svg
    │       ├── main-figure.png
    │       └── main-figure.svg
    │   └── js
    │       ├── bulma-carousel.js
    │       ├── bulma-carousel.min.js
    │       ├── bulma-slider.js
    │       ├── bulma-slider.min.js
    │       ├── fontawesome.all.min.js
    │       └── index.js
├── eval_general
    ├── eval_gsm8k_tgi.py
    ├── eval_mmlu_hf.py
    ├── eval_mt_bench_tgi.py
    └── requirements.txt
└── eval_heldout
    ├── hotpotQA
        ├── eval_hotpot.py
        ├── requirements.txt
        └── src
        │   ├── agent_arch.py
        │   ├── config.py
        │   ├── data
        │       ├── easy.joblib
        │       ├── hard.joblib
        │       └── medium.joblib
        │   ├── evaluate.py
        │   ├── fewshots.py
        │   ├── hotpotqa_env.py
        │   ├── llms.py
        │   ├── pre_prompt.py
        │   ├── prompt.txt
        │   ├── utils.py
        │   ├── wikienv.py
        │   └── wrappers.py
    ├── miniwob++
        ├── .gitignore
        ├── LICENSE
        ├── README.md
        ├── available_tasks.txt
        ├── computergym
        │   ├── .gitignore
        │   ├── README.md
        │   ├── computergym
        │   │   ├── __init__.py
        │   │   └── miniwob
        │   │   │   ├── base_env.py
        │   │   │   └── miniwob_interface
        │   │   │       ├── __init__.py
        │   │   │       ├── action.py
        │   │   │       ├── environment.py
        │   │   │       ├── fields.py
        │   │   │       ├── html
        │   │   │           ├── .gitignore
        │   │   │           ├── common
        │   │   │           │   ├── shapes.js
        │   │   │           │   ├── special
        │   │   │           │   │   ├── book-flight
        │   │   │           │   │   │   └── domestic.js
        │   │   │           │   │   ├── checkbox-numbers
        │   │   │           │   │   │   ├── ch_0.png
        │   │   │           │   │   │   ├── ch_1.png
        │   │   │           │   │   │   ├── ch_2.png
        │   │   │           │   │   │   ├── ch_3.png
        │   │   │           │   │   │   ├── ch_4.png
        │   │   │           │   │   │   ├── ch_5.png
        │   │   │           │   │   │   ├── ch_6.png
        │   │   │           │   │   │   ├── ch_7.png
        │   │   │           │   │   │   ├── ch_8.png
        │   │   │           │   │   │   └── ch_9.png
        │   │   │           │   │   ├── click-pie
        │   │   │           │   │   │   ├── raphael.icons.min.js
        │   │   │           │   │   │   ├── raphael.min.js
        │   │   │           │   │   │   └── wheelnav.min.js
        │   │   │           │   │   ├── drag-cube
        │   │   │           │   │   │   ├── blank.png
        │   │   │           │   │   │   ├── cube.css
        │   │   │           │   │   │   └── cube.js
        │   │   │           │   │   ├── email-inbox-nl
        │   │   │           │   │   │   └── templates.js
        │   │   │           │   │   ├── email-inbox
        │   │   │           │   │   │   ├── delete.png
        │   │   │           │   │   │   ├── email-inbox.css
        │   │   │           │   │   │   ├── forward.png
        │   │   │           │   │   │   ├── left-arrow-white.png
        │   │   │           │   │   │   ├── left-arrow.png
        │   │   │           │   │   │   ├── reply.png
        │   │   │           │   │   │   ├── search.png
        │   │   │           │   │   │   ├── send.png
        │   │   │           │   │   │   ├── star-clicked.png
        │   │   │           │   │   │   └── star.png
        │   │   │           │   │   ├── navigate-tree
        │   │   │           │   │   │   ├── images
        │   │   │           │   │   │   │   ├── ajax-loader.gif
        │   │   │           │   │   │   │   ├── file.gif
        │   │   │           │   │   │   │   ├── folder-closed.gif
        │   │   │           │   │   │   │   ├── folder.gif
        │   │   │           │   │   │   │   ├── minus.gif
        │   │   │           │   │   │   │   ├── plus.gif
        │   │   │           │   │   │   │   ├── treeview-black-line.gif
        │   │   │           │   │   │   │   ├── treeview-black.gif
        │   │   │           │   │   │   │   ├── treeview-default-line.gif
        │   │   │           │   │   │   │   ├── treeview-default.gif
        │   │   │           │   │   │   │   ├── treeview-famfamfam-line.gif
        │   │   │           │   │   │   │   ├── treeview-famfamfam.gif
        │   │   │           │   │   │   │   ├── treeview-gray-line.gif
        │   │   │           │   │   │   │   ├── treeview-gray.gif
        │   │   │           │   │   │   │   ├── treeview-red-line.gif
        │   │   │           │   │   │   │   └── treeview-red.gif
        │   │   │           │   │   │   ├── jquery.treeview.css
        │   │   │           │   │   │   └── jquery.treeview.min.js
        │   │   │           │   │   ├── search-engine
        │   │   │           │   │   │   └── jquery.twbsPagination.min.js
        │   │   │           │   │   ├── social-media
        │   │   │           │   │   │   ├── like-hover.png
        │   │   │           │   │   │   ├── like.png
        │   │   │           │   │   │   ├── more-hover.png
        │   │   │           │   │   │   ├── more.png
        │   │   │           │   │   │   ├── reply-hover.png
        │   │   │           │   │   │   ├── reply.png
        │   │   │           │   │   │   ├── retweet-hover.png
        │   │   │           │   │   │   ├── retweet.png
        │   │   │           │   │   │   ├── share-hover.png
        │   │   │           │   │   │   └── share.png
        │   │   │           │   │   ├── text-editor
        │   │   │           │   │   │   ├── quill.min.js
        │   │   │           │   │   │   └── quill.snow.css
        │   │   │           │   │   └── tic-tac-toe
        │   │   │           │   │   │   ├── o.png
        │   │   │           │   │   │   └── x.png
        │   │   │           │   └── ui_utils.js
        │   │   │           ├── core
        │   │   │           │   ├── core.css
        │   │   │           │   ├── core.js
        │   │   │           │   ├── d3.v3.min.js
        │   │   │           │   ├── jquery-ui
        │   │   │           │   │   ├── external
        │   │   │           │   │   │   └── jquery
        │   │   │           │   │   │   │   └── jquery.js
        │   │   │           │   │   ├── images
        │   │   │           │   │   │   ├── ui-icons_444444_256x240.png
        │   │   │           │   │   │   ├── ui-icons_555555_256x240.png
        │   │   │           │   │   │   ├── ui-icons_777620_256x240.png
        │   │   │           │   │   │   ├── ui-icons_777777_256x240.png
        │   │   │           │   │   │   ├── ui-icons_cc0000_256x240.png
        │   │   │           │   │   │   └── ui-icons_ffffff_256x240.png
        │   │   │           │   │   ├── jquery-ui.min.css
        │   │   │           │   │   ├── jquery-ui.min.js
        │   │   │           │   │   ├── jquery-ui.structure.min.css
        │   │   │           │   │   └── jquery-ui.theme.min.css
        │   │   │           │   ├── jscolor.min.js
        │   │   │           │   └── record.js
        │   │   │           ├── flight
        │   │   │           │   ├── AA
        │   │   │           │   │   ├── apps
        │   │   │           │   │   │   └── common
        │   │   │           │   │   │   │   └── js
        │   │   │           │   │   │   │       ├── aacom.js
        │   │   │           │   │   │   │       ├── aacomDevice.js
        │   │   │           │   │   │   │       ├── airportcode.js
        │   │   │           │   │   │   │       ├── cookieconsent.js
        │   │   │           │   │   │   │       ├── jquery
        │   │   │           │   │   │   │           └── aacom
        │   │   │           │   │   │   │           │   ├── plugins
        │   │   │           │   │   │   │           │       ├── aaAirportAutoComplete.js
        │   │   │           │   │   │   │           │       ├── aaCache.js
        │   │   │           │   │   │   │           │       ├── aaCookie.js
        │   │   │           │   │   │   │           │       ├── aaCountryLanSelect.js
        │   │   │           │   │   │   │           │       ├── aaDropdownPanel.js
        │   │   │           │   │   │   │           │       ├── aaFooterAds.js
        │   │   │           │   │   │   │           │       └── aaTextBoxMessage.js
        │   │   │           │   │   │   │           │   └── utilities
        │   │   │           │   │   │   │           │       ├── aaUtilities-2.1.js
        │   │   │           │   │   │   │           │       └── aaUtils.js
        │   │   │           │   │   │   │       └── wa.js
        │   │   │           │   │   ├── content
        │   │   │           │   │   │   ├── common
        │   │   │           │   │   │   │   └── css
        │   │   │           │   │   │   │   │   └── v3
        │   │   │           │   │   │   │   │       ├── core.css
        │   │   │           │   │   │   │   │       ├── jquery-ui-1.10-aa.css
        │   │   │           │   │   │   │   │       ├── reservation
        │   │   │           │   │   │   │   │           └── findFlights
        │   │   │           │   │   │   │   │           │   └── mobile
        │   │   │           │   │   │   │   │           │       └── findFlights.css
        │   │   │           │   │   │   │   │       └── responsive.css
        │   │   │           │   │   │   ├── fonts
        │   │   │           │   │   │   │   ├── american-v2
        │   │   │           │   │   │   │   │   ├── americansans-bold.woff
        │   │   │           │   │   │   │   │   ├── americansans-light.woff
        │   │   │           │   │   │   │   │   ├── americansans-medium.woff
        │   │   │           │   │   │   │   │   └── americansans-regular.woff
        │   │   │           │   │   │   │   └── icons
        │   │   │           │   │   │   │   │   └── american-icons-v4-4.woff
        │   │   │           │   │   │   └── images
        │   │   │           │   │   │   │   ├── chrome
        │   │   │           │   │   │   │       ├── icons
        │   │   │           │   │   │   │       │   └── loading.gif
        │   │   │           │   │   │   │       └── rebrand
        │   │   │           │   │   │   │       │   ├── aa-flight-icon.png
        │   │   │           │   │   │   │       │   ├── aa-icons-flags-sprite.png
        │   │   │           │   │   │   │       │   ├── aa-logo.png
        │   │   │           │   │   │   │       │   ├── down-arrow.png
        │   │   │           │   │   │   │       │   ├── favicon.png
        │   │   │           │   │   │   │       │   ├── oneworld.png
        │   │   │           │   │   │   │       │   ├── shadow-down.png
        │   │   │           │   │   │   │       │   ├── shadow-vertical-150.png
        │   │   │           │   │   │   │       │   ├── shadow12-down.png
        │   │   │           │   │   │   │       │   ├── shadow12-up.png
        │   │   │           │   │   │   │       │   └── shadow3-down.png
        │   │   │           │   │   │   │   └── graphics
        │   │   │           │   │   │   │       └── icons
        │   │   │           │   │   │   │           └── aa-jqueryUIicons-sprite.png
        │   │   │           │   │   ├── dataset-AA.js
        │   │   │           │   │   ├── index.html
        │   │   │           │   │   ├── js
        │   │   │           │   │   │   ├── aa
        │   │   │           │   │   │   │   ├── common
        │   │   │           │   │   │   │   │   ├── aa-utility-menu.js
        │   │   │           │   │   │   │   │   ├── aacom-ui-1.0.0.js
        │   │   │           │   │   │   │   │   └── core-2.0.0.js
        │   │   │           │   │   │   │   ├── modules
        │   │   │           │   │   │   │   │   ├── airportLookup.js
        │   │   │           │   │   │   │   │   ├── ajax.js
        │   │   │           │   │   │   │   │   ├── browserdetect.js
        │   │   │           │   │   │   │   │   ├── commonsetup.js
        │   │   │           │   │   │   │   │   ├── mobileDatePicker.js
        │   │   │           │   │   │   │   │   ├── utilities.js
        │   │   │           │   │   │   │   │   └── widgets.js
        │   │   │           │   │   │   │   ├── plugins
        │   │   │           │   │   │   │   │   └── noBounce.js
        │   │   │           │   │   │   │   └── shopping
        │   │   │           │   │   │   │   │   └── mobileSearchFlights.js
        │   │   │           │   │   │   └── libs
        │   │   │           │   │   │   │   ├── jquery
        │   │   │           │   │   │   │       ├── jquery-1.11.1.min.js
        │   │   │           │   │   │   │       ├── jquery-migrate-1.2.1.min.js
        │   │   │           │   │   │   │       └── ui
        │   │   │           │   │   │   │       │   └── 1.10
        │   │   │           │   │   │   │       │       ├── i18n
        │   │   │           │   │   │   │       │           └── jquery.ui.datepicker-en-aa.js
        │   │   │           │   │   │   │       │       └── jquery-ui.min.js
        │   │   │           │   │   │   │   └── modernizr-2.8.1.js
        │   │   │           │   │   ├── original.html
        │   │   │           │   │   ├── surrogate
        │   │   │           │   │   │   ├── airportLookup.js
        │   │   │           │   │   │   └── airports.json
        │   │   │           │   │   └── wrapper.html
        │   │   │           │   ├── Alaska-auto-medium
        │   │   │           │   │   ├── images
        │   │   │           │   │   │   ├── aura.png
        │   │   │           │   │   │   ├── cal3.png
        │   │   │           │   │   │   ├── chkboxes3.png
        │   │   │           │   │   │   ├── clear_text2.png
        │   │   │           │   │   │   ├── collapse.png
        │   │   │           │   │   │   ├── expand.png
        │   │   │           │   │   │   ├── flight_arrow.png
        │   │   │           │   │   │   ├── geo.png
        │   │   │           │   │   │   ├── home.png
        │   │   │           │   │   │   ├── info2.png
        │   │   │           │   │   │   ├── leftright.png
        │   │   │           │   │   │   ├── logo2.png
        │   │   │           │   │   │   └── logos
        │   │   │           │   │   │   │   ├── AA.png
        │   │   │           │   │   │   │   ├── AS.png
        │   │   │           │   │   │   │   ├── DL.png
        │   │   │           │   │   │   │   └── VX.png
        │   │   │           │   │   ├── index.html
        │   │   │           │   │   ├── mobileweb-v3-28-6227-21813.css
        │   │   │           │   │   ├── scripts
        │   │   │           │   │   │   ├── datepickr.js
        │   │   │           │   │   │   ├── main.js
        │   │   │           │   │   │   └── shopbook.js
        │   │   │           │   │   ├── stylesheets
        │   │   │           │   │   │   └── circular
        │   │   │           │   │   │   │   ├── ASCircularWeb-Bold.woff
        │   │   │           │   │   │   │   └── ASCircularWeb-Book.woff
        │   │   │           │   │   ├── surrogate
        │   │   │           │   │   │   ├── airportLookup.js
        │   │   │           │   │   │   └── airports.json
        │   │   │           │   │   └── wrapper.html
        │   │   │           │   ├── Alaska-auto
        │   │   │           │   │   ├── images
        │   │   │           │   │   │   ├── aura.png
        │   │   │           │   │   │   ├── cal3.png
        │   │   │           │   │   │   ├── chkboxes3.png
        │   │   │           │   │   │   ├── clear_text2.png
        │   │   │           │   │   │   ├── collapse.png
        │   │   │           │   │   │   ├── expand.png
        │   │   │           │   │   │   ├── flight_arrow.png
        │   │   │           │   │   │   ├── geo.png
        │   │   │           │   │   │   ├── home.png
        │   │   │           │   │   │   ├── info2.png
        │   │   │           │   │   │   ├── leftright.png
        │   │   │           │   │   │   ├── logo2.png
        │   │   │           │   │   │   └── logos
        │   │   │           │   │   │   │   ├── AA.png
        │   │   │           │   │   │   │   ├── AS.png
        │   │   │           │   │   │   │   ├── DL.png
        │   │   │           │   │   │   │   └── VX.png
        │   │   │           │   │   ├── index.html
        │   │   │           │   │   ├── mobileweb-v3-28-6227-21813.css
        │   │   │           │   │   ├── scripts
        │   │   │           │   │   │   ├── datepickr.js
        │   │   │           │   │   │   ├── main.js
        │   │   │           │   │   │   └── shopbook.js
        │   │   │           │   │   ├── stylesheets
        │   │   │           │   │   │   └── circular
        │   │   │           │   │   │   │   ├── ASCircularWeb-Bold.woff
        │   │   │           │   │   │   │   └── ASCircularWeb-Book.woff
        │   │   │           │   │   ├── surrogate
        │   │   │           │   │   │   ├── airportLookup.js
        │   │   │           │   │   │   └── airports.json
        │   │   │           │   │   └── wrapper.html
        │   │   │           │   ├── Alaska
        │   │   │           │   │   ├── dataset-Alaska.js
        │   │   │           │   │   ├── images
        │   │   │           │   │   │   ├── aura.png
        │   │   │           │   │   │   ├── cal3.png
        │   │   │           │   │   │   ├── chkboxes3.png
        │   │   │           │   │   │   ├── clear_text2.png
        │   │   │           │   │   │   ├── collapse.png
        │   │   │           │   │   │   ├── expand.png
        │   │   │           │   │   │   ├── flight_arrow.png
        │   │   │           │   │   │   ├── geo.png
        │   │   │           │   │   │   ├── home.png
        │   │   │           │   │   │   ├── info2.png
        │   │   │           │   │   │   ├── leftright.png
        │   │   │           │   │   │   ├── logo2.png
        │   │   │           │   │   │   └── logos
        │   │   │           │   │   │   │   ├── AA.png
        │   │   │           │   │   │   │   ├── AS.png
        │   │   │           │   │   │   │   ├── DL.png
        │   │   │           │   │   │   │   └── VX.png
        │   │   │           │   │   ├── index.html
        │   │   │           │   │   ├── mobileweb-v3-28-6227-21813.css
        │   │   │           │   │   ├── original.html
        │   │   │           │   │   ├── scripts
        │   │   │           │   │   │   ├── datepickr.js
        │   │   │           │   │   │   ├── main.js
        │   │   │           │   │   │   └── shopbook.js
        │   │   │           │   │   ├── stylesheets
        │   │   │           │   │   │   └── circular
        │   │   │           │   │   │   │   ├── ASCircularWeb-Bold.woff
        │   │   │           │   │   │   │   └── ASCircularWeb-Book.woff
        │   │   │           │   │   ├── surrogate
        │   │   │           │   │   │   ├── airportLookup.js
        │   │   │           │   │   │   └── airports.json
        │   │   │           │   │   └── wrapper.html
        │   │   │           │   └── flight-common
        │   │   │           │   │   ├── inject.js
        │   │   │           │   │   ├── wrapper.css
        │   │   │           │   │   └── wrapper.js
        │   │   │           └── miniwob
        │   │   │           │   ├── bisect-angle.html
        │   │   │           │   ├── book-flight-nodelay.html
        │   │   │           │   ├── book-flight.html
        │   │   │           │   ├── chase-circle.html
        │   │   │           │   ├── choose-date-easy.html
        │   │   │           │   ├── choose-date-medium.html
        │   │   │           │   ├── choose-date-nodelay.html
        │   │   │           │   ├── choose-date.html
        │   │   │           │   ├── choose-list.html
        │   │   │           │   ├── circle-center.html
        │   │   │           │   ├── click-button-sequence.html
        │   │   │           │   ├── click-button.html
        │   │   │           │   ├── click-checkboxes-large.html
        │   │   │           │   ├── click-checkboxes-soft.html
        │   │   │           │   ├── click-checkboxes-transfer.html
        │   │   │           │   ├── click-checkboxes.html
        │   │   │           │   ├── click-collapsible-2-nodelay.html
        │   │   │           │   ├── click-collapsible-2.html
        │   │   │           │   ├── click-collapsible-nodelay.html
        │   │   │           │   ├── click-collapsible.html
        │   │   │           │   ├── click-color.html
        │   │   │           │   ├── click-dialog-2.html
        │   │   │           │   ├── click-dialog.html
        │   │   │           │   ├── click-link.html
        │   │   │           │   ├── click-menu-2.html
        │   │   │           │   ├── click-menu.html
        │   │   │           │   ├── click-option.html
        │   │   │           │   ├── click-pie-nodelay.html
        │   │   │           │   ├── click-pie.html
        │   │   │           │   ├── click-scroll-list.html
        │   │   │           │   ├── click-shades.html
        │   │   │           │   ├── click-shape.html
        │   │   │           │   ├── click-tab-2-easy.html
        │   │   │           │   ├── click-tab-2-hard.html
        │   │   │           │   ├── click-tab-2-medium.html
        │   │   │           │   ├── click-tab-2.html
        │   │   │           │   ├── click-tab.html
        │   │   │           │   ├── click-test-2.html
        │   │   │           │   ├── click-test-transfer.html
        │   │   │           │   ├── click-test.html
        │   │   │           │   ├── click-widget.html
        │   │   │           │   ├── copy-paste-2.html
        │   │   │           │   ├── copy-paste.html
        │   │   │           │   ├── count-shape.html
        │   │   │           │   ├── count-sides.html
        │   │   │           │   ├── drag-box.html
        │   │   │           │   ├── drag-cube.html
        │   │   │           │   ├── drag-item.html
        │   │   │           │   ├── drag-items-grid.html
        │   │   │           │   ├── drag-items.html
        │   │   │           │   ├── drag-shapes.html
        │   │   │           │   ├── drag-sort-numbers.html
        │   │   │           │   ├── email-inbox-delete.html
        │   │   │           │   ├── email-inbox-forward-nl-turk.html
        │   │   │           │   ├── email-inbox-forward-nl.html
        │   │   │           │   ├── email-inbox-forward.html
        │   │   │           │   ├── email-inbox-important.html
        │   │   │           │   ├── email-inbox-nl-turk.html
        │   │   │           │   ├── email-inbox-noscroll.html
        │   │   │           │   ├── email-inbox-reply.html
        │   │   │           │   ├── email-inbox-star-reply.html
        │   │   │           │   ├── email-inbox.html
        │   │   │           │   ├── enter-date.html
        │   │   │           │   ├── enter-password.html
        │   │   │           │   ├── enter-text-2.html
        │   │   │           │   ├── enter-text-dynamic.html
        │   │   │           │   ├── enter-text.html
        │   │   │           │   ├── enter-time.html
        │   │   │           │   ├── find-midpoint.html
        │   │   │           │   ├── find-word.html
        │   │   │           │   ├── focus-text-2.html
        │   │   │           │   ├── focus-text.html
        │   │   │           │   ├── grid-coordinate.html
        │   │   │           │   ├── guess-number.html
        │   │   │           │   ├── highlight-text-2.html
        │   │   │           │   ├── highlight-text.html
        │   │   │           │   ├── identify-shape.html
        │   │   │           │   ├── login-user-popup.html
        │   │   │           │   ├── login-user.html
        │   │   │           │   ├── moving-items.html
        │   │   │           │   ├── multi-layouts.html
        │   │   │           │   ├── multi-orderings.html
        │   │   │           │   ├── navigate-tree.html
        │   │   │           │   ├── number-checkboxes.html
        │   │   │           │   ├── read-table-2.html
        │   │   │           │   ├── read-table.html
        │   │   │           │   ├── resize-textarea.html
        │   │   │           │   ├── right-angle.html
        │   │   │           │   ├── scroll-text-2.html
        │   │   │           │   ├── scroll-text.html
        │   │   │           │   ├── search-engine.html
        │   │   │           │   ├── simon-says.html
        │   │   │           │   ├── simple-algebra.html
        │   │   │           │   ├── simple-arithmetic.html
        │   │   │           │   ├── social-media-all.html
        │   │   │           │   ├── social-media-some.html
        │   │   │           │   ├── social-media.html
        │   │   │           │   ├── terminal.html
        │   │   │           │   ├── terminal_reproduce.html
        │   │   │           │   ├── text-editor.html
        │   │   │           │   ├── text-transform.html
        │   │   │           │   ├── tic-tac-toe.html
        │   │   │           │   ├── unicode-test.html
        │   │   │           │   ├── use-autocomplete-nodelay.html
        │   │   │           │   ├── use-autocomplete.html
        │   │   │           │   ├── use-colorwheel-2.html
        │   │   │           │   ├── use-colorwheel.html
        │   │   │           │   ├── use-slider-2.html
        │   │   │           │   ├── use-slider.html
        │   │   │           │   ├── use-spinner.html
        │   │   │           │   └── visual-addition.html
        │   │   │       ├── instance.py
        │   │   │       ├── reward.py
        │   │   │       ├── screenshot.py
        │   │   │       ├── state.py
        │   │   │       └── utils.py
        │   └── setup.py
        ├── eval-gpt-3.5-turbo.sh
        ├── eval-gpt-4.sh
        ├── eval-tgi.sh
        ├── llm_agent.py
        ├── main.py
        ├── metrics.py
        ├── prompt.py
        ├── prompt
        │   ├── action.txt
        │   ├── base.txt
        │   ├── choose-list
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-button-sequence
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-button
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-checkboxes-large
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-checkboxes-soft
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-checkboxes-transfer
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-checkboxes
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-collapsible-2
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-collapsible
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-dialog-2
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-dialog
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-menu
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-option
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-scroll-list
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-shades
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-shape
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-tab-2-hard
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-tab-2
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-tab
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-test
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── click-widget
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── count-shape
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── email-inbox-forward-nl-turk
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── email-inbox-forward-nl
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── email-inbox-nl-turk
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── email-inbox
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── enter-date
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── enter-time
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── example.txt
        │   ├── few-shot.json
        │   ├── first_action.txt
        │   ├── focus-text
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── grid-coordinate
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── check_finish.txt
        │   │   ├── done.txt
        │   │   ├── edit_plan.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── new_plan.txt
        │   │   ├── rci_action.txt
        │   │   ├── rci_answer.txt
        │   │   └── update_action.txt
        │   ├── identify-shape
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── initialize_plan.txt
        │   ├── login-user-popup
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── multi-layouts
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── navigate-tree
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── rci_action.txt
        │   ├── search-engine
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── simple-algebra
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── check_finish.txt
        │   │   ├── done.txt
        │   │   ├── edit_plan.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── new_plan.txt
        │   │   ├── rci_action.txt
        │   │   ├── rci_answer.txt
        │   │   └── update_action.txt
        │   ├── social-media-all
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── social-media-some
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── social-media
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── terminal
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── tic-tac-toe
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   ├── update_action.txt
        │   ├── use-autocomplete
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        │   └── use-spinner
        │   │   ├── action.txt
        │   │   ├── base.txt
        │   │   ├── example.txt
        │   │   ├── first_action.txt
        │   │   ├── initialize_plan.txt
        │   │   ├── rci_action.txt
        │   │   └── update_action.txt
        └── requirements.txt
    ├── rewoo
        ├── .gitignore
        ├── LICENSE
        ├── README.md
        ├── algos
        │   ├── PWS.py
        │   ├── __init__.py
        │   ├── notool.py
        │   └── react.py
        ├── alpaca
        │   ├── __init__.py
        │   ├── lora.py
        │   ├── templates
        │   │   ├── README.md
        │   │   ├── alpaca.json
        │   │   ├── alpaca_legacy.json
        │   │   ├── alpaca_short.json
        │   │   └── vigogne.json
        │   └── utils
        │   │   ├── README.md
        │   │   ├── __init__.py
        │   │   ├── callbacks.py
        │   │   └── prompter.py
        ├── eval-gpt-3.5-turbo.sh
        ├── eval-gpt-4.sh
        ├── eval-tgi.sh
        ├── metrics.py
        ├── nodes
        │   ├── LLMNode.py
        │   ├── Node.py
        │   ├── NodeCofig.py
        │   ├── Planner.py
        │   ├── Solver.py
        │   ├── Worker.py
        │   └── __init__.py
        ├── prompts
        │   ├── __init__.py
        │   ├── fewshots.py
        │   ├── planner.py
        │   ├── solver.py
        │   └── wiki_prompt.py
        ├── requirements.txt
        ├── run_eval.py
        └── utils
        │   ├── CustomDocstoreExplorer.py
        │   ├── DataLoader.py
        │   ├── Evaluator.py
        │   ├── __init__.py
        │   └── util.py
    ├── science-world
        ├── .gitignore
        ├── README.md
        ├── data_utils
        │   ├── READMD.md
        │   ├── __init__.py
        │   ├── data_convert.py
        │   ├── data_utils.py
        │   ├── demos.json
        │   └── goldpaths-all.zip
        ├── eval-gpt-3.5.sh
        ├── eval-gpt-4.sh
        ├── eval-tgi.sh
        ├── eval.py
        ├── eval_utils.py
        ├── fast_agent
        │   ├── deepspeed_reqs.txt
        │   ├── ds_train.py
        │   ├── ds_train.sh
        │   └── zero_2_bf16.json
        ├── metrics.py
        ├── prompts
        │   ├── README.md
        │   ├── convert.py
        │   ├── prompt.json
        │   └── prompt_orig.json
        ├── requirements.txt
        └── slow_agent
        │   ├── local_llm.py
        │   ├── run_gradio.py
        │   └── utils.py
    └── webarena
        ├── .gitignore
        ├── LICENSE
        ├── README.md
        ├── agent
            ├── __init__.py
            ├── agent.py
            └── prompts
            │   ├── README.md
            │   ├── __init__.py
            │   ├── prompt_constructor.py
            │   ├── raw
            │       ├── p_cot_id_actree_2s.py
            │       └── p_direct_id_actree_2s.py
            │   └── to_json.py
        ├── browser_env
            ├── __init__.py
            ├── actions.py
            ├── async_envs.py
            ├── auto_login.py
            ├── constants.py
            ├── env_config.py
            ├── envs.py
            ├── helper_functions.py
            ├── processors.py
            ├── py.typed
            ├── trajectory.py
            └── utils.py
        ├── config_files
            ├── examples
            │   ├── 1.json
            │   ├── 2.json
            │   ├── 3.json
            │   └── 4.json
            └── test.raw.json
        ├── environment_docker
            ├── README.md
            └── webarena-homepage
            │   ├── app.py
            │   ├── static
            │       └── figures
            │       │   ├── calculator.png
            │       │   ├── cms.png
            │       │   ├── gitlab.png
            │       │   ├── manual1.png
            │       │   ├── manual2.png
            │       │   ├── map.png
            │       │   ├── onestopshop.png
            │       │   ├── password.png
            │       │   ├── reddit.png
            │       │   ├── scratchpad.png
            │       │   └── wikipedia.png
            │   └── templates
            │       ├── calculator.html
            │       ├── index.html
            │       └── scratchpad.html
        ├── eval-gpt-3.5-turbo.sh
        ├── eval-gpt-4.sh
        ├── eval-tgi.sh
        ├── evaluation_harness
            ├── __init__.py
            ├── evaluators.py
            └── helper_functions.py
        ├── llms
            ├── __init__.py
            ├── lm_config.py
            ├── providers
            │   └── openai_utils.py
            └── tokenizers.py
        ├── prepare.sh
        ├── run.py
        ├── setup.cfg
        └── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | eval_heldout/rewoo/data/** filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | tgi_data/
2 | __pycache__/
3 | .DS_Store


--------------------------------------------------------------------------------
/AgentBench.old/configs/agents/do_nothing.yaml:
--------------------------------------------------------------------------------
1 | module: "src.agents.DoNothingAgent"
2 | parameters:
3 |     name: "Do-Nothing-Agent"
4 |     sleep: 0.01
5 | 


--------------------------------------------------------------------------------
/AgentBench.old/configs/agents/tgi_clients/AgentLM-13b.yaml:
--------------------------------------------------------------------------------
 1 | module: "src.agents.TGIAgent"
 2 | parameters:
 3 |     ip: "http://127.0.0.1"
 4 |     # Also you can set up the AgentLM on many address
 5 |     # The TGIAgent will automatically balance the load
 6 |     # address ∈ [address_from, address_to)
 7 |     address_from: 30013
 8 |     address_to: 30014
 9 |     model_name: "AgentLM-13b"
10 |     max_tokens: 4096


--------------------------------------------------------------------------------
/AgentBench.old/configs/agents/tgi_clients/AgentLM-70b.yaml:
--------------------------------------------------------------------------------
 1 | module: "src.agents.TGIAgent"
 2 | parameters:
 3 |     ip: "http://127.0.0.1"
 4 |     # Also you can set up the AgentLM on many address
 5 |     # The TGIAgent will automatically balance the load
 6 |     # address ∈ [address_from, address_to)
 7 |     address_from: 30070
 8 |     address_to: 30071
 9 |     model_name: "AgentLM-70b"
10 |     max_tokens: 4096


--------------------------------------------------------------------------------
/AgentBench.old/configs/agents/tgi_clients/AgentLM-7b.yaml:
--------------------------------------------------------------------------------
 1 | module: "src.agents.TGIAgent"
 2 | parameters:
 3 |     ip: "http://127.0.0.1"
 4 |     # Also you can set up the AgentLM on many address
 5 |     # The TGIAgent will automatically balance the load
 6 |     # address ∈ [address_from, address_to)
 7 |     address_from: 30007
 8 |     address_to: 30008
 9 |     model_name: "AgentLM-7b"
10 |     max_tokens: 4096


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/alfworld/dev.yaml:
--------------------------------------------------------------------------------
 1 | module: "src.tasks.alfworld.ALFWorld"
 2 | parameters:
 3 |     name: "ALFWorld"
 4 |     data_path: "/AgentBench/data/alfworld" # TODO replace it with your own data path
 5 |     config_path: "src/tasks/alfworld/configs/base_config.yaml"
 6 |     prompts_path: "src/tasks/alfworld/prompts/alfworld_multiturn_react.json"
 7 |     split: "dev"
 8 |     max_step: 35
 9 | 
10 | 


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/alfworld/std.yaml:
--------------------------------------------------------------------------------
 1 | module: "src.tasks.alfworld.ALFWorld"
 2 | parameters:
 3 |     name: "ALFWorld"
 4 |     data_path: "/AgentBench/data/alfworld" # TODO replace it with your own data path
 5 |     config_path: "src/tasks/alfworld/configs/base_config.yaml"
 6 |     prompts_path: "src/tasks/alfworld/prompts/alfworld_multiturn_react.json"
 7 |     split: "std"
 8 |     max_step: 35
 9 | 
10 | 


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/card_game/dev.yaml:
--------------------------------------------------------------------------------
1 | module: "src.tasks.CardGame"
2 | 
3 | parameters:
4 |     name: "CardGame"
5 |     port: 12347
6 |     test_time: 3


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/card_game/ext.yaml:
--------------------------------------------------------------------------------
1 | module: "src.tasks.CardGame"
2 | 
3 | parameters:
4 |     name: "CardGame"
5 |     port: 12349
6 |     test_time: 50


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/card_game/std.yaml:
--------------------------------------------------------------------------------
1 | module: "src.tasks.CardGame"
2 | 
3 | parameters:
4 |     name: "CardGame"
5 |     port: 12342
6 |     test_time: 5


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/dbbench/dev.yaml:
--------------------------------------------------------------------------------
1 | module: src.tasks.DBBench
2 | 
3 | parameters:
4 |     name: "DBBench"
5 |     data_file: data/dbbench/dev.jsonl
6 |     max_round: 15
7 | 


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/dbbench/std.yaml:
--------------------------------------------------------------------------------
1 | module: src.tasks.DBBench
2 | 
3 | parameters:
4 |   name: "DBBench"
5 |   data_file: data/dbbench/standard.jsonl
6 |   max_round: 15


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/knowledgegraph/dev.yaml:
--------------------------------------------------------------------------------
1 | module: "src.tasks.KnowledgeGraph"
2 | parameters:
3 |     name: "KnowledgeGraph-dev"
4 |     round: 15
5 |     data_file: "data/knowledgegraph/dev.json"
6 |     sparql_url: "http://164.107.116.56:3093/sparql"
7 | 


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/knowledgegraph/std.yaml:
--------------------------------------------------------------------------------
1 | module: "src.tasks.KnowledgeGraph"
2 | parameters:
3 |     name: "KnowledgeGraph-std"
4 |     round: 15
5 |     data_file: "data/knowledgegraph/std.json"
6 |     sparql_url: "http://164.107.116.56:3093/sparql"
7 | 


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/webshop/dev.yaml:
--------------------------------------------------------------------------------
1 | module: src.tasks.WebShop
2 | 
3 | parameters:
4 |   name: "WebShop"
5 |   start: 200
6 |   end: 280
7 |   num_envs: 3
8 |   worker_limit: 3


--------------------------------------------------------------------------------
/AgentBench.old/configs/tasks/webshop/std.yaml:
--------------------------------------------------------------------------------
1 | module: src.tasks.WebShop
2 | 
3 | parameters:
4 |   name: "WebShop"
5 |   start: 0
6 |   end: 200
7 |   num_envs: 3
8 |   worker_limit: 3


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/data/4/N37.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "What's the full path of a file \"TOOLS\"?",
 3 |     "create": {
 4 |         "local": "default",
 5 |         "init": "echo 123>/usr/TOOLS"
 6 |     },
 7 |     "evaluation": {
 8 |         "check": [
 9 |             null,
10 |             {
11 |                 "language": "python",
12 |                 "file": "check/string-match.py"
13 |             }
14 |         ],
15 |         "example": "find / -name TOOLS"
16 |     },
17 |     "labels": ["path", "file"]
18 | }
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/data/4/N4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "execute ~/test and get the output.",
 3 |     "start": "useradd -s /bin/bash -d /home/jack -m jack && cd /home/jack && echo 'echo love' > test && chmod 000 test && chown jack test && su - jack",
 4 |     "evaluation": {
 5 |         "match": "love",
 6 |         "example": {
 7 |             "code": "chmod 777 test && ~/test"
 8 |         }
 9 |     },
10 |     "labels": ["permission", "user"]
11 | }
12 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/res/dockerfiles/default:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 | RUN apt update && apt install python3 -y && apt install python3-pip -y && apt install git -y && apt install vim -y && apt install curl -y && apt install wget -y && apt install unzip -y && apt install zip -y && apt install tree -y
3 | CMD ["bash"]
4 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/res/dockerfiles/packages:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 | RUN apt update && apt install python3 -y && apt install python3-pip -y && apt install git -y && apt install vim -y && apt install curl -y && apt install wget -y && apt install unzip -y && apt install zip -y && apt install tree -y && apt install npm -y
3 | CMD ["bash"]
4 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/res/dockerfiles/ubuntu:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 | RUN echo "tmp" > file.txt
3 | CMD ["bash"]


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/1/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/2/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/3/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/4/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/checking/2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | check() {
 4 |     target=`date -d "$1" +"$2"`
 5 |     output=`date-format "$1" "$2"` || exit 1
 6 |     [ "$output"x != "$target"x ] && exit 1
 7 |     exit 0
 8 | }
 9 | 
10 | check "2023-5-1" "%Y-%m" || exit 1
11 | check "23-5-2" "%Y-%m-%d" || exit 1
12 | check "2023-5-1" "%Y/%m" || exit 1
13 | check "2023-5-1" "%m/%d" || exit 1
14 | check "2023/5/10" "%d/%m" || exit 1
15 | check "2021/05/1" "Date: %Y-%m-%d" || exit 1
16 | 
17 | exit 0


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/example/0.sh:
--------------------------------------------------------------------------------
1 | echo '#!/bin/bash
2 | python3 -c "print(\"%.6f\"%($*))"' > calc
3 | chmod +x calc
4 | mv calc /usr/local/bin/
5 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/example/2.sh:
--------------------------------------------------------------------------------
1 | echo '#!/bin/bash
2 | 
3 | date -d "$1" +"$2"
4 | 
5 | ' > /usr/local/bin/date-format
6 | chmod +x /usr/local/bin/date-format
7 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/5/init/1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/data/os_interaction/scripts/5/init/1.sh


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/7/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/dev/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/dev/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/dev/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/dev/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/dev/example/0.sh:
--------------------------------------------------------------------------------
1 | echo '#!/bin/bash
2 | python3 -c "print(\"%.6f\"%($*))"' > calc
3 | chmod +x calc
4 | mv calc /usr/local/bin/
5 | 


--------------------------------------------------------------------------------
/AgentBench.old/data/os_interaction/scripts/dev/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/AgentBench.old/eval/AgentLM-13b-eval-all.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/bash
 2 | 
 3 | export AGENT_CONFIG='configs/agents/tgi_clients/AgentLM-13b.yaml'
 4 | export WORKERS=8
 5 | eval_time=$(date "+%Y-%m-%d-%H:%M:%S")
 6 | export OUTPUT_ROOT_DIR=outputs/AgentLM-13b/$eval_time
 7 | 
 8 | # For Held-in task
 9 | export SPLIT='std'
10 | bash eval/single-task/alfworld.sh
11 | bash eval/single-task/webshop.sh
12 | bash eval/single-task/mind2web.sh
13 | bash eval/single-task/kg.sh
14 | bash eval/single-task/db.sh
15 | bash eval/single-task/os.sh
16 | 
17 | # For Held-out task
18 | export SPLIT='ext'
19 | bash eval/single-task/card.sh


--------------------------------------------------------------------------------
/AgentBench.old/eval/AgentLM-70b-eval-all.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/bash
 2 | 
 3 | export AGENT_CONFIG='configs/agents/tgi_clients/AgentLM-70b.yaml'
 4 | export WORKERS=8
 5 | eval_time=$(date "+%Y-%m-%d-%H:%M:%S")
 6 | export OUTPUT_ROOT_DIR=outputs/AgentLM-70b/$eval_time
 7 | 
 8 | # For Held-in task
 9 | export SPLIT='std'
10 | bash eval/single-task/alfworld.sh
11 | bash eval/single-task/webshop.sh
12 | bash eval/single-task/mind2web.sh
13 | bash eval/single-task/kg.sh
14 | bash eval/single-task/db.sh
15 | bash eval/single-task/os.sh
16 | 
17 | # For Held-out task
18 | export SPLIT='ext'
19 | bash eval/single-task/card.sh


--------------------------------------------------------------------------------
/AgentBench.old/eval/AgentLM-7b-eval-all.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/bash
 2 | 
 3 | export AGENT_CONFIG='configs/agents/tgi_clients/AgentLM-7b.yaml'
 4 | export WORKERS=8
 5 | eval_time=$(date "+%Y-%m-%d-%H:%M:%S")
 6 | export OUTPUT_ROOT_DIR=outputs/AgentLM-7b/$eval_time
 7 | 
 8 | # For Held-in task
 9 | export SPLIT='std'
10 | bash eval/single-task/alfworld.sh
11 | bash eval/single-task/webshop.sh
12 | bash eval/single-task/mind2web.sh
13 | bash eval/single-task/kg.sh
14 | bash eval/single-task/db.sh
15 | bash eval/single-task/os.sh
16 | 
17 | # # For Held-out task
18 | export SPLIT='ext'
19 | bash eval/single-task/card.sh


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/alfworld.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | 
3 | evaluate_in_docker "learningrate/agentbench-alfworld" \
4 |     --task "configs/tasks/alfworld/$SPLIT.yaml" \
5 |     --agent "$AGENT_CONFIG" \
6 |     --workers $WORKERS \
7 |     --output_dir "$OUTPUT_DIR" --no_timestamp \
8 |     --max_new_tokens 128


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/card.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | 
3 | evaluate_in_docker "learningrate/agentbench-card_game" \
4 |     --task "configs/tasks/card_game/$SPLIT.yaml" \
5 |     --agent "$AGENT_CONFIG" \
6 |     --workers $WORKERS \
7 |     --output_dir "$OUTPUT_DIR" --no_timestamp\
8 |     --max_new_tokens 512


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/db.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | 
3 | evaluate_directly \
4 |     --task "configs/tasks/dbbench/$SPLIT.yaml" \
5 |     --agent "$AGENT_CONFIG" \
6 |     --workers $WORKERS \
7 |     --output_dir "$OUTPUT_DIR" --no_timestamp\
8 |     --max_new_tokens 128


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/kg.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | 
3 | evaluate_directly \
4 |     --task "configs/tasks/knowledgegraph/$SPLIT.yaml" \
5 |     --agent "$AGENT_CONFIG" \
6 |     --workers $WORKERS \
7 |     --output_dir "$OUTPUT_DIR" --no_timestamp\
8 |     --max_new_tokens 128


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/mind2web.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | 
3 | evaluate_in_docker "learningrate/agentbench-mind2web" \
4 |     --task "configs/tasks/mind2web/$SPLIT.yaml" \
5 |     --agent "$AGENT_CONFIG" \
6 |     --workers $WORKERS \
7 |     --output_dir "$OUTPUT_DIR" --no_timestamp\
8 |     --max_new_tokens 128


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/os.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | set -x
3 | 
4 | evaluate_directly \
5 |     --task "configs/tasks/os_interaction/$SPLIT.yaml" \
6 |     --agent "$AGENT_CONFIG" \
7 |     --workers $WORKERS \
8 |     --output_dir "$OUTPUT_DIR" --no_timestamp\
9 |     --max_new_tokens 128


--------------------------------------------------------------------------------
/AgentBench.old/eval/single-task/webshop.sh:
--------------------------------------------------------------------------------
1 | source eval/single-task/eval_single_setup.sh
2 | 
3 | evaluate_in_docker "learningrate/agentbench-webshop" \
4 |     --task "configs/tasks/webshop/$SPLIT.yaml" \
5 |     --agent "$AGENT_CONFIG" \
6 |     --workers $WORKERS \
7 |     --output_dir "$OUTPUT_DIR" --no_timestamp \
8 |     --max_new_tokens 128
9 | 


--------------------------------------------------------------------------------
/AgentBench.old/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | fschat==0.2.31
3 | dataclass_wizard
4 | jsonlines
5 | tensorboard
6 | openai


--------------------------------------------------------------------------------
/AgentBench.old/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import Agent, Session
2 | from .task import *
3 | from .configs import *
4 | from .utils import print_rank_0, JsonEncoder
5 | from .agents import *
6 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from .do_nothing_agent import DoNothingAgent
2 | from . import tgi_client
3 | from .tgi_client import TGIAgent


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/alfworld/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import ALFWorld


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action.cpp:
--------------------------------------------------------------------------------
 1 | #include "Action.hpp"
 2 | #include <algorithm>
 3 | 
 4 | std::vector<int> AI::Pick(Game game)
 5 | {
 6 |     // TODO: fill your code
 7 | }
 8 | 
 9 | std::pair<int, int> AI::Assert(Game game)
10 | {
11 |     // TODO: fill your code
12 | }
13 | 
14 | Action AI::Act(Game game)
15 | {
16 |     // TODO: fill your code
17 | }


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/C++/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ main.cpp Action.cpp sdk/jsoncpp/jsoncpp.cpp -o main -std=c++17


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/C++/main.cpp:
--------------------------------------------------------------------------------
1 | #include "Action.hpp"
2 | 
3 | int main()
4 | {
5 |     AI *myAI = new AI();
6 |     myAI->run();
7 |     delete (myAI);
8 |     return 0;
9 | }


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline1.py:
--------------------------------------------------------------------------------
1 | from action1 import AI
2 | import sys
3 | 
4 | if __name__ == "__main__":
5 |     stage = int(sys.argv[1])
6 |     
7 |     myAI = AI(stage)
8 |     myAI.run()
9 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline2.py:
--------------------------------------------------------------------------------
1 | from action2 import AI
2 | import sys
3 | 
4 | if __name__ == "__main__":
5 |     stage = int(sys.argv[1])
6 |     
7 |     myAI = AI(stage)
8 |     myAI.run()
9 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline3.py:
--------------------------------------------------------------------------------
1 | from action3 import AI
2 | import sys
3 | 
4 | if __name__ == "__main__":
5 |     stage = int(sys.argv[1])
6 |     
7 |     myAI = AI(stage)
8 |     myAI.run()
9 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from client import Client
 3 | import json
 4 | 
 5 | if __name__ == "__main__":
 6 |     language = sys.argv[1]
 7 |     stage = int(sys.argv[2])
 8 |     order = int(sys.argv[3])
 9 |     save_dir = sys.argv[4]
10 |     port = int(sys.argv[5])
11 |     client = Client(port=port)
12 |     if language == 'en':
13 |         from AI_En import Agent
14 |         myAI = Agent(client, stage, order, save_dir)
15 |     else:
16 |         from AI_Cn import Agent
17 |         myAI = Agent(client, stage, order, save_dir)
18 |     
19 |     myAI.run()
20 |     client.quit()


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/3.25.0/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/3.25.0/CMakeDetermineCompilerABI_CXX.bin


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/3.25.0/CMakeSystem.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_HOST_SYSTEM "Linux-5.10.16.3-microsoft-standard-WSL2")
 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.10.16.3-microsoft-standard-WSL2")
 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
 5 | 
 6 | 
 7 | 
 8 | set(CMAKE_SYSTEM "Linux-5.10.16.3-microsoft-standard-WSL2")
 9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "5.10.16.3-microsoft-standard-WSL2")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 | 
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 | 
15 | set(CMAKE_SYSTEM_LOADED 1)
16 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/3.25.0/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/3.25.0/CompilerIdCXX/a.out


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir
2 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/edit_cache.dir
3 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/rebuild_cache.dir
4 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o"
 3 |   "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o.d"
 4 |   "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o"
 5 |   "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o.d"
 6 |   "bin/ai_client.cpython-310-x86_64-linux-gnu.so"
 7 |   "bin/ai_client.pdb"
 8 | )
 9 | 
10 | # Per-language clean rules from dependency scanning.
11 | foreach(lang CXX)
12 |   include(CMakeFiles/ai_client.dir/cmake_clean_${lang}.cmake OPTIONAL)
13 | endforeach()
14 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for ai_client.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ai_client.
3 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ai_client.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.25
 3 | 
 4 | # compile CXX with /usr/bin/c++
 5 | CXX_DEFINES = -Dai_client_EXPORTS
 6 | 
 7 | CXX_INCLUDES = -isystem /home/piano/mambaforge/lib/python3.10/site-packages/pybind11/include -isystem /mnt/e/conda/envs/ml/include/python3.10
 8 | 
 9 | CXX_FLAGS = -O3 -DNDEBUG -fPIC -fvisibility=hidden -flto -fno-fat-lto-objects -std=gnu++17
10 | 
11 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -O3 -DNDEBUG -flto -shared  -o bin/ai_client.cpython-310-x86_64-linux-gnu.so CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o 
2 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 | CMAKE_PROGRESS_3 = 3
4 | 
5 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 3
2 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/bin/ai_client.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/bin/ai_client.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cp38-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cp38-win_amd64.pyd


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-38-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-38-darwin.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/pyd/ai_client.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import CardGame


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/logic/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | cmake-build-debug


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/logic/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ src/aqua_sdk.cpp src/fish.cpp src/fishset.cpp src/game.cpp src/main.cpp src/player.cpp src/jsoncpp/jsoncpp.cpp -o bin/main -std=c++17


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/logic/bin/main:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/logic/bin/main


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/logic/src/main:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/card_game/logic/src/main


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/logic/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include "aqua_sdk.cpp"
2 | 
3 | int main(int argc, char* argv[]){
4 |     AquaWarSDK aw;
5 |     aw.start();
6 |     aw.run();
7 | 
8 |     return 0;
9 | }


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/card_game/logic/src/timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <chrono>
 4 | 
 5 | class Timer {
 6 |    public:
 7 |     using BaseClock = std::chrono::steady_clock;
 8 |     using TimePoint = BaseClock::time_point;
 9 |     using Duration = BaseClock::duration;
10 | 
11 |     Timer() : m_start(Timer::now()) {}
12 |     int runtime() const {
13 |         Duration time = Timer::now() - m_start;
14 |         return std::chrono::duration_cast<std::chrono::seconds>(time).count();
15 |     }
16 |     static TimePoint now() { return BaseClock::now(); }
17 | 
18 |    private:
19 |     TimePoint m_start;
20 | };


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/dbbench/requirements.txt:
--------------------------------------------------------------------------------
1 | mysql-connector-python==8.0.33
2 | docker==6.1.2


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/knowledgegraph/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import KnowledgeGraph


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/knowledgegraph/requirements.txt:
--------------------------------------------------------------------------------
1 | SPARQLWrapper
2 | networkx


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/mind2web/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import Mind2Web


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/mind2web/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/mind2web/data_utils/__init__.py


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/os_interaction/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import OSInteraction


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/os_interaction/requirements.txt:
--------------------------------------------------------------------------------
1 | docker
2 | PyYAML
3 | PyYAML
4 | Requests
5 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/.gitignore:
--------------------------------------------------------------------------------
 1 | *.ipynb*
 2 | *.pyc
 3 | *.swp
 4 | 
 5 | .DS_Store
 6 | .idea/
 7 | .pytest_cache/
 8 | .vscode/
 9 | 
10 | __pycache__/
11 | data/
12 | search_engine/indexes*
13 | search_engine/resources*
14 | transfer/flagged
15 | user_session_logs/
16 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/assets/diagram.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/assets/diagram.gif


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/assets/model_ckpts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/assets/model_ckpts.png


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/assets/transfer-logic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/assets/transfer-logic.png


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/baseline_models/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | datasets
3 | faiss-gpu
4 | transformers
5 | wandb


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/conftest.py


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.11.1
 2 | cleantext==1.1.4
 3 | env==0.1.0
 4 | Flask==2.1.2
 5 | gdown
 6 | gradio
 7 | gym==0.24.0
 8 | numpy==1.22.4
 9 | pandas==1.4.2
10 | pyserini==0.17.0
11 | pytest
12 | PyYAML==6.0
13 | rank_bm25==0.2.2
14 | requests==2.27.1
15 | requests_mock
16 | rich==12.4.4
17 | scikit_learn==1.1.1
18 | selenium==4.2.0
19 | spacy
20 | thefuzz==0.19.0
21 | torch==1.11.0
22 | tqdm==4.64.0
23 | train==0.0.5
24 | transformers==4.19.2
25 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/run_dev.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export FLASK_ENV=development
3 | python -m web_agent_site.app --log --attrs
4 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/run_prod.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m web_agent_site.app --log
3 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/run_web_agent_site_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m run_envs.run_web_agent_site_env
3 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/run_web_agent_text_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m run_envs.run_web_agent_text_env
3 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/search_engine/lucene_searcher.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pyserini.search.lucene import LuceneSearcher
 3 | from rich import print
 4 | 
 5 | 
 6 | searcher = LuceneSearcher('indexes')
 7 | hits = searcher.search('rubber sole shoes', k=20)
 8 | 
 9 | for hit in hits:
10 |     doc = searcher.doc(hit.docid)
11 |     print(doc)
12 |     obj = json.loads(doc.raw())['product']['Title']
13 |     print(obj)
14 | 
15 | print(len(hits))
16 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/transfer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/transfer/__init__.py


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/web_agent_site/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/web_agent_site/__init__.py


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/web_agent_site/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/web_agent_site/engine/__init__.py


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/web_agent_site/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | from web_agent_site.envs.web_agent_site_env import WebAgentSiteEnv
 4 | from web_agent_site.envs.web_agent_text_env import WebAgentTextEnv
 5 | 
 6 | register(
 7 |   id='WebAgentSiteEnv-v0',
 8 |   entry_point='web_agent_site.envs:WebAgentSiteEnv',
 9 | )
10 | 
11 | register(
12 |   id='WebAgentTextEnv-v0',
13 |   entry_point='web_agent_site.envs:WebAgentTextEnv',
14 | )


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/web_agent_site/envs/chromedriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/web_agent_site/envs/chromedriver


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/web_agent_site/models/__init__.py:
--------------------------------------------------------------------------------
1 | from web_agent_site.models.models import *
2 | 


--------------------------------------------------------------------------------
/AgentBench.old/src/tasks/webshop/web_agent_site/static/images/no-image-available.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/AgentBench.old/src/tasks/webshop/web_agent_site/static/images/no-image-available.png


--------------------------------------------------------------------------------
/assets/head-figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/assets/head-figure.png


--------------------------------------------------------------------------------
/assets/main-figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/assets/main-figure.png


--------------------------------------------------------------------------------
/docs/static/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/docs/static/images/favicon.ico


--------------------------------------------------------------------------------
/docs/static/images/head-figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/docs/static/images/head-figure.png


--------------------------------------------------------------------------------
/docs/static/images/main-figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/docs/static/images/main-figure.png


--------------------------------------------------------------------------------
/docs/static/js/index.js:
--------------------------------------------------------------------------------
 1 | window.HELP_IMPROVE_VIDEOJS = false;
 2 | 
 3 | 
 4 | $(document).ready(function() {
 5 |     // Check for click events on the navbar burger icon
 6 | 
 7 |     var options = {
 8 | 			slidesToScroll: 1,
 9 | 			slidesToShow: 1,
10 | 			loop: true,
11 | 			infinite: true,
12 | 			autoplay: true,
13 | 			autoplaySpeed: 5000,
14 |     }
15 | 
16 | 		// Initialize all div with carousel class
17 |     var carousels = bulmaCarousel.attach('.carousel', options);
18 | 	
19 |     bulmaSlider.attach();
20 | 
21 | })
22 | 


--------------------------------------------------------------------------------
/eval_general/requirements.txt:
--------------------------------------------------------------------------------
 1 | fschat[llm-judge]==0.2.31
 2 | tqdm
 3 | numpy
 4 | jsonlines
 5 | argparse
 6 | datasets
 7 | requests
 8 | jsonlines
 9 | torch==2.0.1
10 | shortuuid
11 | 


--------------------------------------------------------------------------------
/eval_heldout/hotpotQA/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.174
2 | pandas<=2.0.0


--------------------------------------------------------------------------------
/eval_heldout/hotpotQA/src/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright (c) 2023, salesforce.com, inc.
 3 |  All rights reserved.
 4 |  SPDX-License-Identifier: Apache License 2.0
 5 |  For full license text, see the LICENSE file in the repo root or https://www.apache.org/licenses/LICENSE-2.0
 6 | """
 7 | 
 8 | available_agent_names = ["Zeroshot_HotPotQA_run_Agent", "ZeroshotThink_HotPotQA_run_Agent",
 9 |                          "React_HotPotQA_run_Agent", "Planner_HotPotQA_run_Agent", "PlannerReact_HotPotQA_run_Agent"]
10 | OPENAI_API_KEY = "YOUR_API_KEY_HERE"
11 | 


--------------------------------------------------------------------------------
/eval_heldout/hotpotQA/src/data/easy.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/hotpotQA/src/data/easy.joblib


--------------------------------------------------------------------------------
/eval_heldout/hotpotQA/src/data/hard.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/hotpotQA/src/data/hard.joblib


--------------------------------------------------------------------------------
/eval_heldout/hotpotQA/src/data/medium.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/hotpotQA/src/data/medium.joblib


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/.gitignore:
--------------------------------------------------------------------------------
 1 | # Distribution / packaging
 2 | .Python
 3 | build/
 4 | develop-eggs/
 5 | dist/
 6 | downloads/
 7 | eggs/
 8 | .eggs/
 9 | lib/
10 | lib64/
11 | parts/
12 | sdist/
13 | var/
14 | wheels/
15 | share/python-wheels/
16 | *.egg-info/
17 | .installed.cfg
18 | *.egg
19 | MANIFEST


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/README.md:
--------------------------------------------------------------------------------
1 | # Computergym
2 | 
3 | ## Install
4 | ```sh
5 | pip install -e .
6 | ```


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | from gym.envs.registration import register
 5 | 
 6 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 7 | 
 8 | 
 9 | _AVAILABLE_ENVS = {
10 |     "MiniWoBEnv-v0": {
11 |         "entry_point": "computergym.miniwob.base_env:MiniWoBEnv",
12 |         "discription": "MinoWoB++ environments",
13 |     },
14 | }
15 | 
16 | for env_id, val in _AVAILABLE_ENVS.items():
17 |     register(id=env_id, entry_point=val.get("entry_point"))
18 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/.gitignore:
--------------------------------------------------------------------------------
1 | twistd.pid
2 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_0.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_1.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_4.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_5.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_6.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_7.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_8.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_9.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/drag-cube/blank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/drag-cube/blank.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/delete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/delete.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/forward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/forward.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow-white.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/reply.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/reply.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/search.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/send.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/send.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star-clicked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star-clicked.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/ajax-loader.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/file.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/file.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder-closed.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder-closed.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/minus.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/minus.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/plus.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/plus.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black-line.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black-line.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default-line.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default-line.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam-line.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam-line.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray-line.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray-line.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red-line.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red-line.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like-hover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like-hover.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more-hover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more-hover.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply-hover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply-hover.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet-hover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet-hover.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share-hover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share-hover.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/o.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/o.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/x.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_444444_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_444444_256x240.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_555555_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_555555_256x240.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777620_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777620_256x240.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777777_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777777_256x240.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_cc0000_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_cc0000_256x240.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_ffffff_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_ffffff_256x240.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/apps/common/js/jquery/aacom/plugins/aaCache.js:
--------------------------------------------------------------------------------
1 | var aaCache=(function($j){var cache={};function _get(key){if(!cache[key]){cache[key]=$j(key);}return cache[key];}function _remove(key){if(cache.hasOwnProperty(key)){return(delete cache[key]);}return true;}return{get:_get,remove:_remove};}(jQuery));


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-bold.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-light.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-light.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-medium.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-medium.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-regular.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/icons/american-icons-v4-4.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/icons/american-icons-v4-4.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/icons/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/icons/loading.gif


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-flight-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-flight-icon.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-icons-flags-sprite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-icons-flags-sprite.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-logo.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/down-arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/down-arrow.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/favicon.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/oneworld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/oneworld.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-down.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-vertical-150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-vertical-150.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-down.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-up.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow3-down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow3-down.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/graphics/icons/aa-jqueryUIicons-sprite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/graphics/icons/aa-jqueryUIicons-sprite.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/js/aa/modules/commonsetup.js:
--------------------------------------------------------------------------------
1 | AAcom.modules.commonsetup=function(AAUI){/* NOT NEEDED */};
2 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/aura.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/aura.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/cal3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/cal3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/chkboxes3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/chkboxes3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/clear_text2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/clear_text2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/collapse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/collapse.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/expand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/expand.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/flight_arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/flight_arrow.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/geo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/geo.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/home.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/info2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/info2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/leftright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/leftright.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logo2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AA.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AS.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/DL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/DL.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/VX.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/VX.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Bold.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Book.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Book.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/aura.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/aura.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/cal3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/cal3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/chkboxes3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/chkboxes3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/clear_text2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/clear_text2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/collapse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/collapse.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/expand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/expand.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/flight_arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/flight_arrow.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/geo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/geo.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/home.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/info2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/info2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/leftright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/leftright.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logo2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AA.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AS.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/DL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/DL.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/VX.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/VX.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Bold.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Book.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Book.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/aura.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/aura.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/cal3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/cal3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/chkboxes3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/chkboxes3.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/clear_text2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/clear_text2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/collapse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/collapse.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/expand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/expand.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/flight_arrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/flight_arrow.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/geo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/geo.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/home.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/info2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/info2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/leftright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/leftright.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logo2.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AA.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AS.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/DL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/DL.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/VX.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/VX.png


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Bold.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Book.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Book.woff


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/eval-gpt-3.5-turbo.sh:
--------------------------------------------------------------------------------
1 | export CONTROLLER_ADDR=
2 | for task in $(cat available_tasks.txt)
3 | do
4 |     python main.py --env $task --llm chatgpt --num-episodes 4 --erci 1 --irci 3 --sgrounding &
5 | done


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/eval-gpt-4.sh:
--------------------------------------------------------------------------------
1 | export CONTROLLER_ADDR=
2 | for task in $(cat available_tasks.txt)
3 | do
4 |     python main.py --env $task --llm gpt4 --num-episodes 1 --erci 1 --irci 3 --sgrounding &
5 | done


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/eval-tgi.sh:
--------------------------------------------------------------------------------
1 | export CONTROLLER_ADDR=http://127.0.0.1:23333
2 | llm=agent-llama-70b
3 | for task in $(cat available_tasks.txt)
4 | do
5 |     python main.py --env $task --llm $llm --num-episodes 10 --erci 1 --irci 3 --sgrounding &
6 | done


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/choose-list/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/choose-list/example.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/choose-list/example.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/choose-list/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/choose-list/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/choose-list/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/choose-list/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/choose-list/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button-sequence/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button-sequence/example.txt:
--------------------------------------------------------------------------------
1 | task: Click button ONE, then click button TWO.
2 | plan:
3 | 1. Click the button with xpath "//button[@id='subbtn']".
4 | 2. Click the button with xpath "//button[@id='subbtn2']".


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button-sequence/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button-sequence/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button-sequence/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button-sequence/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to click "Ok" button.
3 | Here is a plan to solve this example task on the same webpage with the autonomous agent. 
4 | 1. clickxpath //button[text()="Ok"]
5 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-button/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction for solving the task should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-large/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-large/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-large/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-large/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-large/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-soft/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-soft/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | task: select the words similar to bunnies, tiny, scared, swine and click the Submit button.
3 | plan: we need to select synonyms of the given words and the words themselves.
4 | 1. clickxpath //label[text() = 'rabbit']/input // rabbit is a synonym of bunnies
5 | 2. clickxpath //label[text() = 'pig']/input // pig is a synonym of swine
6 | 3. clickxpath //label[text() = 'panicked']/input panicked is a synonym of scared
7 | 4. clickxpath //label[text() = 'tiny']/input 
8 | 5. clickxpath //button[@id='subbtn']
9 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-soft/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-soft/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-soft/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-soft/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-checkboxes/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible-2/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible-2/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible-2/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible-2/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible-2/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to expand the section below and click the submit button.
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
4 | 1. Click the expanded section by typing the xpath: "//*[@id="ui-id-1"]".
5 | 2. Click the submit button by typing the xpath: "//button[@id='subbtn']".


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-collapsible/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog-2/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog-2/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog-2/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog-2/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog-2/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/click-dialog-2/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to close the dialog box by clicking the "x".
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
4 | 
5 | 1. Clickxpath //button[@class='ui-button ui-corner-all ui-widget ui-button-icon-only ui-dialog-titlebar-close']
6 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-dialog/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/click-dialog/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-menu/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-menu/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is for the agent to select Leonelle > Jane > Amabel from the HTML code provided.
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. You must use "movemouse" not "clickxpath" to expand option.
4 | 1. movemouse //*[text()="Leonelle"]
5 | 2. movemouse //*[text()="Jane"]
6 | 3. clickxpath //*[text()="Amabel"]
7 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-menu/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-menu/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-menu/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-menu/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/click-menu/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-option/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-option/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to select the radio button labeled "Rf" and click the "Submit" button.
3 | Here is a plan to solve this example task on the same webpage with the autonomous agent. 
4 | 1. clickxpath //label[input[@id='ch4']]
5 | 2. clickxpath //button[@id='subbtn']


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-option/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-option/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-option/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-option/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction for solving the task should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-scroll-list/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-scroll-list/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to select Canada, Bosnia and Herzegovina from the scroll list and click Submit.
3 | Here is a plan to solve this example task on the same webpage with the autonomous agent. 
4 | 
5 | 1. clickoption //option[text() = 'Canada']
6 | 2. clickoption //option[text() = 'Bosnia']
7 | 3. clickoption //option[text() = 'Herzegovina']
8 | 4. Clickxpath //*[@class="secondary-action"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-scroll-list/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-scroll-list/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-scroll-list/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-scroll-list/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction for solving the task should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shades/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shades/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to select all the shades of red and press the Submit button.
3 | Here is a plan to solve a task on the webpage with the autonomous agent. The first instruction will click all the shades of red.
4 | 
5 | 1. Clickxpath //span[@data-color="red"]
6 | 2. Clickxpath //*[@id="submit"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shades/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shades/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shades/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shades/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, the single specific instruction for solving the task should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shape/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shape/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to click on the magenta digit on the webpage.
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
4 | 1. Clickxpath //*[name()='svg']//*[name()='text' and @fill='magenta']
5 | 
6 | The task is to click on the large 4 on the webpage.
7 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
8 | 1. clickxpath //*[name()='svg']//*[name()='text' and @fill="aqua" and @font-size="20px" and text()="4"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shape/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shape/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shape/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-shape/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2-hard/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2-hard/example.txt:
--------------------------------------------------------------------------------
 1 | Example plans)
 2 | task: switch between the tabs to find and click on the link "amet".
 3 | plan:
 4 | 1. Clickxpath //a[text()="1"] // first, open the proper tab where the link with a text "amet" is in
 5 | 2. Clickxpath //*[@id="tabs-1"]/p/span[text()="amet] 
 6 | 
 7 | task: switch between the tabs to find and click on the link "gur".
 8 | plan:
 9 | 1. Clickxpath //a[text()="4"] // first, open the proper tab where the link with a text "gur" is in
10 | 2. Clickxpath //*[@id="tabs-4"]/p/span[text()="gur"]
11 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2-hard/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2-hard/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2-hard/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2-hard/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | task: switch between the tabs to find and click on the link "vitae."
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
4 | 1. clickxpath //a[text()="Tab #2"] // first, open the proper tab where the link with a text "vitae." is in
5 | 2. clickxpath //*[@id="tabs-2"]/p/span[text()="vitae."]
6 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab-2/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to click on Tab #3 on the webpage.
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
4 | 1. clickxpath //a[text()="Tab #3"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-tab/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/click-tab/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-test/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-test/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to click the button with the id "subbtn" on the webpage.
3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
4 | 1. Clickxpath //*[@id="subbtn"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-test/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-test/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-test/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-test/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-widget/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-widget/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-widget/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-widget/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/click-widget/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/count-shape/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/count-shape/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | task: How many small letters are there?
3 | plan: we need to count the number of small letters in the svg area.
4 | clickxpath //*[@id="count-buttons"]/button[5]
5 | task: How many red items are there?
6 | plan: we need to count the number of red items in the svg area.
7 | clickxpath //*[@id="count-buttons"]/button[2]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/count-shape/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/count-shape/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/count-shape/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/count-shape/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/action.txt:
--------------------------------------------------------------------------------
1 | the next instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/email-inbox/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/email-inbox/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-date/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction to solve the task should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-date/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-date/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan for the task on the above webpage:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-date/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-date/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-time/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-time/example.txt:
--------------------------------------------------------------------------------
 1 | Example plans)
 2 | The task is to enter 9:01 AM as the time in the input box and press the submit button.
 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
 4 | 1. clickxpath //*[@id="tt"] 
 5 | 2. type 0901AM 
 6 | 3. clickxpath //*[@id="subbtn"]
 7 | 
 8 | The task is to enter 10:07 PM as the time in the input box and press the submit button.
 9 | Here is a plan to solve this example task on the webpage with the autonomous agent. 
10 | 1. clickxpath //*[@id="tt"] 
11 | 2. type 1007PM 
12 | 3. clickxpath //*[@id="subbtn"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-time/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-time/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-time/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/enter-time/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/enter-time/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/example.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/example.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/focus-text/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/focus-text/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is for the agent to focus into the textbox on the webpage.
3 | Here is a plan to solve the example task.
4 | 1. clickxpath //*[@id="area"]/input[@id="tt"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/focus-text/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/focus-text/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/focus-text/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/focus-text/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/focus-text/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/check_finish.txt:
--------------------------------------------------------------------------------
1 | Do you think you complete the current plan?


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/done.txt:
--------------------------------------------------------------------------------
1 | Do you think that you accomplished the task on this webpage with the above list of instructions? Answer Yes or No.


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/edit_plan.txt:
--------------------------------------------------------------------------------
1 | The agent is following the above plan. Do you think that you need to edit the current plan for the agent to solve the task on this webpage? Answer yes or no and explain why.


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | The task is to click on the grid coordinate (1,2) on the webpage.
3 | Here is a plan to solve the example task.
4 | 1. clickxpath //*[@id="(1,2)"]


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/new_plan.txt:
--------------------------------------------------------------------------------
1 | You failed with the above plans to solve a task on the below webpage. Based on your answer, what is your new plan to solve the task on this webpage? Note that the new plan should be able to start from the current webpage state. You should not assume that the autonomous agent is in the initial state of the webpage. 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/rci_answer.txt:
--------------------------------------------------------------------------------
1 | Answer Yes or No.


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/grid-coordinate/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/grid-coordinate/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/identify-shape/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/identify-shape/example.txt:
--------------------------------------------------------------------------------
1 | Example plan)
2 | task: click the button that best describes the figure below, which is a number (1).
3 | plan: note that number 0 is a number not circle and the number text inside <text> tag is number not a letter.
4 | 1. clickxpath //button[@data-type="digit"] 
5 | 
6 | task: click the button that best describes the figure below.
7 | plan: note that a single letter of number 0 is a digit not a circle, and a single number inside <text> tag is a digit not a letter.
8 | 1. clickxpath //button[@data-type="triangle"] 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/identify-shape/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/identify-shape/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/identify-shape/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/identify-shape/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/identify-shape/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/login-user-popup/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/login-user-popup/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/login-user-popup/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/login-user-popup/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. Notice that the instruction shouldn't contain quotation marks. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/login-user-popup/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/login-user-popup/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/multi-layouts/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/multi-layouts/example.txt:
--------------------------------------------------------------------------------
 1 | Example plan)
 2 | task: search for crime movies directed by Mckenzie from year 1970.
 3 | plan:
 4 | 1. click the input box labeled with "genre"
 5 | 2. type crime
 6 | 3. click the input box labeled with "director name"
 7 | 4. type Mckenzie
 8 | 5. click the input box labeled with "year"
 9 | 6. type 1970
10 | 7. click the search button


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/multi-layouts/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/multi-layouts/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/multi-layouts/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/multi-layouts/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/multi-layouts/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/navigate-tree/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/navigate-tree/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | task: navigate through the file tree. Find and click on the folder or file named "Riley".
3 | plan:
4 | 1. clickxpath //span[text()='Briana'] // Click the expandable hitarea of the folder "Briana"
5 | 2. clickxpath //span[text()='Riley']


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/navigate-tree/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/navigate-tree/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/navigate-tree/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/navigate-tree/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/navigate-tree/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/search-engine/action.txt:
--------------------------------------------------------------------------------
1 | the next instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/search-engine/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/search-engine/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/search-engine/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/search-engine/update_action.txt:
--------------------------------------------------------------------------------
1 |  The target of click should be a valid XPath. Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/action.txt:
--------------------------------------------------------------------------------
1 | the next instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/check_finish.txt:
--------------------------------------------------------------------------------
1 | Do you think you complete the current plan?


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/done.txt:
--------------------------------------------------------------------------------
1 | Do you think that you accomplished the task on this webpage with the above list of instructions? Answer Yes or No.


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/edit_plan.txt:
--------------------------------------------------------------------------------
1 | The agent is following the above plan. Do you think that you need to edit the current plan for the agent to solve the task on this webpage? Answer yes or no and explain why.


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/example.txt:
--------------------------------------------------------------------------------
1 | task: Solve for x and type your answer into the textbox. Press Submit when done.
2 | plan:
3 | 1. Locate the input box by clicking on it with a click instruction using its ID "math-answer".
4 | 2. Type the answer using a type instruction.
5 | 3. Press the "Enter" key using a press instruction.
6 | 4. Click the "Submit" button using a clickxpath instruction with the xpath "//button[@id='subbtn']".


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/new_plan.txt:
--------------------------------------------------------------------------------
1 | You failed with the above plans to solve a task on the below webpage. Based on your answer, what is your new plan to solve the task on this webpage? Note that the new plan should be able to start from the current webpage state. You should not assume that the autonomous agent is in the initial state of the webpage. 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Notice that the instruction shouldn't contain quotation marks. Without explanation, the single instruction that matches one of the regular expressions is '


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/rci_answer.txt:
--------------------------------------------------------------------------------
1 | Answer Yes or No.


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/simple-algebra/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/simple-algebra/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-all/action.txt:
--------------------------------------------------------------------------------
1 |  the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-all/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-all/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-all/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-all/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the information on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-some/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-some/example.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/social-media-some/example.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-some/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-some/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-some/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media-some/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/social-media-some/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/social-media/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this single specific instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/terminal/action.txt:
--------------------------------------------------------------------------------
1 | the next instruction that the agent needs to execute is


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/terminal/example.txt:
--------------------------------------------------------------------------------
1 | Example plans)
2 | task: use the terminal below to delete a file ending with the extension .gpg
3 | plan:
4 | 1. Type "ls" to list all files in the terminal with "type ls" instruction.
5 | 2. Press enter with "press enter" instruction
6 | 3. Identify the filename ending with ".gpg" and type "rm [filename].gpg" to delete the identified file with type instruction.
7 | 4. Press enter everytime after typing a command on terminal.
8 | 


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/terminal/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/terminal/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/terminal/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/terminal/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the output on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/tic-tac-toe/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/tic-tac-toe/example.txt:
--------------------------------------------------------------------------------
1 | task: for the agent to play as 'X' and win a game of tic-tac-toe.
2 | plan:
3 | 1. Check the board and click the proper position which is empty to newly put X mark on the board to win the game (e.g., clickxpath clickxpath //*[@id="ttt-i"])
4 | 2. Start in the middle, play defensively, play offensively, and block your opponent to improve your chances of winning
5 | 3. You should select the position if you can directly win by doing that
6 | 3. Repeat these until win


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/tic-tac-toe/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/tic-tac-toe/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/tic-tac-toe/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/tic-tac-toe/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/tic-tac-toe/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/update_action.txt:
--------------------------------------------------------------------------------
1 |  Therefore, considering the information on the webpage, this instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-autocomplete/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-autocomplete/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-autocomplete/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-autocomplete/rci_action.txt:
--------------------------------------------------------------------------------
1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-autocomplete/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/use-autocomplete/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-spinner/action.txt:
--------------------------------------------------------------------------------
1 | the next proper instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-spinner/first_action.txt:
--------------------------------------------------------------------------------
1 | the first instruction should be `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-spinner/initialize_plan.txt:
--------------------------------------------------------------------------------
1 | plan:


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-spinner/rci_action.txt:
--------------------------------------------------------------------------------
1 |  Without explanation, the single instruction that matches one of the regular expressions is `


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/prompt/use-spinner/update_action.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/miniwob++/prompt/use-spinner/update_action.txt


--------------------------------------------------------------------------------
/eval_heldout/miniwob++/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | fschat
3 | gym
4 | openai
5 | selenium
6 | transformers
7 | Pillow
8 | regex


--------------------------------------------------------------------------------
/eval_heldout/rewoo/.gitignore:
--------------------------------------------------------------------------------
1 | ./keys/
2 | .idea/
3 | __pycache__
4 | .vscode
5 | /keys
6 | /logs
7 | /results
8 | /data


--------------------------------------------------------------------------------
/eval_heldout/rewoo/algos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/rewoo/algos/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/rewoo/alpaca/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/rewoo/alpaca/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/rewoo/alpaca/templates/alpaca.json:
--------------------------------------------------------------------------------
1 | {
2 |     "description": "Template used by Alpaca-LoRA.",
3 |     "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
4 |     "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
5 |     "response_split": "### Response:"    
6 | }
7 | 


--------------------------------------------------------------------------------
/eval_heldout/rewoo/alpaca/templates/alpaca_short.json:
--------------------------------------------------------------------------------
1 | {
2 |     "description": "A shorter template to experiment with.",
3 |     "prompt_input": "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
4 |     "prompt_no_input": "### Instruction:\n{instruction}\n\n### Response:\n",
5 |     "response_split": "### Response:"    
6 | }
7 | 


--------------------------------------------------------------------------------
/eval_heldout/rewoo/alpaca/utils/README.md:
--------------------------------------------------------------------------------
 1 | # Directory for helpers modules
 2 | 
 3 | ## prompter.py
 4 | 
 5 | Prompter class, a template manager.
 6 | 
 7 | `from utils.prompter import Prompter`
 8 | 
 9 | ## callbacks.py
10 | 
11 | Helpers to support streaming generate output.
12 | 
13 | `from utils.callbacks import Iteratorize, Stream`
14 | 


--------------------------------------------------------------------------------
/eval_heldout/rewoo/alpaca/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/rewoo/alpaca/utils/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/rewoo/metrics.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | HISTORY = 'logs'
 4 | 
 5 | for llm in Path(HISTORY).iterdir():
 6 |     correct = 0
 7 |     total = 0
 8 |     for task in llm.iterdir():
 9 |         if not task.is_dir():
10 |             continue
11 |         episode_count = 0
12 |         for episode in task.iterdir():
13 |             is_correct = 'succ' in episode.name
14 |             correct += is_correct
15 |             total += 1
16 |     print(f'{llm.name}:\t{correct:3} / {total:3} = {(correct / total):.4}')


--------------------------------------------------------------------------------
/eval_heldout/rewoo/nodes/Node.py:
--------------------------------------------------------------------------------
 1 | # Basic Node to be inherited from.
 2 | class Node:
 3 |     def __init__(self, name, input_type, output_type):
 4 |         self.name = name
 5 |         self.input_type = input_type
 6 |         self.output_type = output_type
 7 | 
 8 |     def run (self, input, log=False):
 9 |         raise NotImplementedError
10 | 
11 | 


--------------------------------------------------------------------------------
/eval_heldout/rewoo/nodes/NodeCofig.py:
--------------------------------------------------------------------------------
1 | OPENAI_CONFIG = {
2 |     "temperature": 0.5,
3 |     "max_tokens": 256,
4 |     "top_p": 1,
5 |     "frequency_penalty": 0,
6 |     "presence_penalty": 0,
7 | }


--------------------------------------------------------------------------------
/eval_heldout/rewoo/nodes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/rewoo/nodes/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/rewoo/prompts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/rewoo/prompts/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/rewoo/prompts/solver.py:
--------------------------------------------------------------------------------
1 | DEFAULT_PREFIX = "Solve the following task or problem. To assist you, we provide some plans and corresponding evidences that might be helpful. Notice that some of these information contain noise so you should trust them with caution.\n\n"
2 | DEFAULT_SUFFIX = "\nNow begin to solve the task or problem. Respond with the answer directly with no extra words.\n\n"
3 | 
4 | 
5 | TOOL_RELUCTANT_SUFFIX = "\nNow begin to solve the task or problem. Respond with the answer directly with no extra words.\n\n"


--------------------------------------------------------------------------------
/eval_heldout/rewoo/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets==2.11.0
 2 | geopy==2.3.0
 3 | gradio==3.32.0
 4 | langchain==0.0.187
 5 | numpy==1.24.2
 6 | openai==0.27.4
 7 | pandas==1.5.2
 8 | peft
 9 | pytz
10 | requests==2.28.1
11 | tqdm==4.64.1
12 | wikipedia
13 | google-search-results
14 | 
15 | accelerate
16 | fschat


--------------------------------------------------------------------------------
/eval_heldout/rewoo/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/rewoo/utils/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/science-world/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .DS_Store
3 | logs/
4 | *.pyc
5 | .vscode/
6 | __pycache__/


--------------------------------------------------------------------------------
/eval_heldout/science-world/data_utils/READMD.md:
--------------------------------------------------------------------------------
1 | ```bash
2 | # wget https://github.com/allenai/ScienceWorld/raw/main/goldpaths/goldpaths-all.zip
3 | wget https://github.com/allenai/ScienceWorld/raw/exhaustivevalidactions/goldpaths/goldpaths-all.zip
4 | unzip goldpaths-all.zip
5 | ```
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/eval_heldout/science-world/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/science-world/data_utils/__init__.py


--------------------------------------------------------------------------------
/eval_heldout/science-world/data_utils/goldpaths-all.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/science-world/data_utils/goldpaths-all.zip


--------------------------------------------------------------------------------
/eval_heldout/science-world/eval-gpt-3.5.sh:
--------------------------------------------------------------------------------
 1 | export OPENAI_API_KEY=sk-your-openai-api-key
 2 | export MODEL_NAME=gpt-3.5-turbo
 3 | 
 4 | for task in {0..29}
 5 | do
 6 |     python eval.py \
 7 |         --task_nums $task \
 8 |         --output_path logs/$MODEL_NAME \
 9 |         --model_name $MODEL_NAME
10 | done


--------------------------------------------------------------------------------
/eval_heldout/science-world/eval-gpt-4.sh:
--------------------------------------------------------------------------------
 1 | export OPENAI_API_KEY=sk-your-openai-api-key
 2 | export MODEL_NAME=gpt-4
 3 | 
 4 | for task in {0..29}
 5 | do
 6 |     python eval.py \
 7 |         --task_nums $task \
 8 |         --output_path logs/$MODEL_NAME \
 9 |         --model_name $MODEL_NAME
10 | done


--------------------------------------------------------------------------------
/eval_heldout/science-world/eval-tgi.sh:
--------------------------------------------------------------------------------
 1 | export CONTROLLER_ADDR=http://127.0.0.1:23333,http://127.0.0.1:23334
 2 | export MODEL_NAME=agent-llama
 3 | 
 4 | for task in {0..29}
 5 | do
 6 |     python eval.py \
 7 |         --task_nums $task \
 8 |         --output_path logs/$MODEL_NAME \
 9 |         --model_name $MODEL_NAME
10 | done


--------------------------------------------------------------------------------
/eval_heldout/science-world/metrics.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import re
 3 | 
 4 | LOGS = 'logs'
 5 | 
 6 | for model in Path(LOGS).iterdir():
 7 |     scores = []
 8 |     for i in range(30):
 9 |         file = model / f'task{i}-score.txt'
10 |         try:
11 |             s = file.open().read()
12 |         except:
13 |             print(f'Warning: {file} not found')
14 |             continue
15 |         score = re.search(r'Average score: ([0-9\.]*)', s)[1]
16 |         x = float(score)
17 |         scores.append(x)
18 | 
19 |     print(f'{model}:', sum(scores) / len(scores))


--------------------------------------------------------------------------------
/eval_heldout/science-world/prompts/README.md:
--------------------------------------------------------------------------------
1 | # Few-shot Prompts
2 | 
3 | `prompt_orig.json` contains the original few-show prompts provided by [SwiftSage](https://github.com/yuchenlin/SwiftSage). We use `convert.py` to convert it to suit our conversation format and saved the result in `prompt.json`, which will be used in our evaluation.


--------------------------------------------------------------------------------
/eval_heldout/webarena/agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import (
2 |     Agent,
3 |     PromptAgent,
4 |     TeacherForcingAgent,
5 |     construct_agent,
6 | )
7 | 
8 | __all__ = ["Agent", "TeacherForcingAgent", "PromptAgent", "construct_agent"]
9 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/agent/prompts/README.md:
--------------------------------------------------------------------------------
1 | ## Naming of the prompt files
2 | `description.action_space.observation_space.json`
3 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/agent/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from .prompt_constructor import *
2 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/browser_env/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/browser_env/py.typed


--------------------------------------------------------------------------------
/eval_heldout/webarena/browser_env/trajectory.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | 
3 | from .actions import Action
4 | from .utils import StateInfo
5 | 
6 | Trajectory = list[Union[StateInfo, Action]]
7 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/calculator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/calculator.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/cms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/cms.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/gitlab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/gitlab.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/manual1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/manual1.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/manual2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/manual2.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/map.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/onestopshop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/onestopshop.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/password.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/password.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/reddit.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/scratchpad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/scratchpad.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/wikipedia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentTuning/e33a45d7eab2b63cac4d1956da1e6377fca9fcc7/eval_heldout/webarena/environment_docker/webarena-homepage/static/figures/wikipedia.png


--------------------------------------------------------------------------------
/eval_heldout/webarena/evaluation_harness/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluators import *
2 | from .helper_functions import (
3 |     shopping_get_latest_order_url,
4 |     shopping_get_sku_latest_review_author,
5 |     shopping_get_sku_latest_review_rating,
6 | )
7 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/llms/__init__.py:
--------------------------------------------------------------------------------
1 | """This module is adapt from https://github.com/zeno-ml/zeno-build"""
2 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/llms/tokenizers.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import tiktoken
 4 | 
 5 | 
 6 | class Tokenizer(object):
 7 |     def __init__(self, model_name: str) -> None:
 8 |         if model_name in ["gpt-4", "gpt-turbo-3.5"]:
 9 |             self.tokenizer = tiktoken.encoding_for_model(model_name)
10 |         else:
11 |             raise NotImplementedError
12 | 
13 |     def __call__(self, text: str) -> list[int]:
14 |         return self.tokenizer.encode(text)
15 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # prepare the evaluation
4 | # re-validate login information
5 | mkdir -p ./.auth
6 | python browser_env/auto_login.py
7 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = webarena
 3 | 
 4 | [tool.pytest.ini_options]
 5 | testpaths = ["tests"]
 6 | python_files = "test_*.py"
 7 | 
 8 | [options.extras_require]
 9 | dev =
10 |     pre-commit==3.0.1
11 |     pytest==7.1.2
12 |     mypy==0.991
13 |     nbmake
14 |     pytest-asyncio
15 |     types-requests
16 | 
17 | [options]
18 | python_requires = >=3.7, <4
19 | packages =
20 |     browser_env
21 |     agent
22 |     evaluation_harness
23 |     llms
24 | [mypy]
25 | strict = true
26 | 


--------------------------------------------------------------------------------
/eval_heldout/webarena/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | if __name__ == "__main__":
4 |     setup()
5 | 


--------------------------------------------------------------------------------