├── .gitattributes ├── .gitignore ├── AgentBench.old ├── configs │ ├── agents │ │ ├── do_nothing.yaml │ │ └── tgi_clients │ │ │ ├── AgentLM-13b.yaml │ │ │ ├── AgentLM-70b.yaml │ │ │ └── AgentLM-7b.yaml │ └── tasks │ │ ├── alfworld │ │ ├── dev.yaml │ │ └── std.yaml │ │ ├── card_game │ │ ├── dev.yaml │ │ ├── ext.yaml │ │ └── std.yaml │ │ ├── dbbench │ │ ├── dev.yaml │ │ └── std.yaml │ │ ├── knowledgegraph │ │ ├── dev.yaml │ │ └── std.yaml │ │ ├── mind2web │ │ ├── dev.yaml │ │ └── std.yaml │ │ ├── os_interaction │ │ ├── dev.yaml │ │ └── std.yaml │ │ └── webshop │ │ ├── dev.yaml │ │ └── std.yaml ├── data │ ├── alfworld │ │ ├── dev.json │ │ └── std.json │ ├── dbbench │ │ ├── dev.jsonl │ │ └── standard.jsonl │ ├── knowledgegraph │ │ ├── dev.json │ │ └── std.json │ ├── mind2web │ │ └── prompt │ │ │ ├── llm_prompt.json │ │ │ └── llm_prompt_cot.json │ └── os_interaction │ │ ├── data │ │ ├── 1 │ │ │ └── stock.json │ │ ├── 2 │ │ │ └── environment.json │ │ ├── 3 │ │ │ └── ac.json │ │ ├── 4 │ │ │ ├── N11.json │ │ │ ├── N225.json │ │ │ ├── N37.json │ │ │ ├── N4.json │ │ │ ├── N41.json │ │ │ ├── Q09.json │ │ │ ├── Q19.json │ │ │ ├── Q30.json │ │ │ ├── Q47.json │ │ │ └── Q49.json │ │ ├── 5 │ │ │ └── new.json │ │ ├── 6 │ │ │ └── new.json │ │ ├── 7 │ │ │ └── 2023-06-07-00-13-14.json │ │ ├── 6-backup.json │ │ └── dev.json │ │ ├── res │ │ └── dockerfiles │ │ │ ├── default │ │ │ ├── packages │ │ │ └── ubuntu │ │ └── scripts │ │ ├── 1 │ │ ├── check │ │ │ ├── 1.sh │ │ │ ├── containing.py │ │ │ ├── in.py │ │ │ ├── integer-match.py │ │ │ ├── size-match.py │ │ │ └── string-match.py │ │ ├── example │ │ │ └── 1.sh │ │ └── init │ │ │ ├── gen_words.sh │ │ │ ├── install_nettools.sh │ │ │ ├── nested_folders.sh │ │ │ └── stock-log.sh │ │ ├── 2 │ │ ├── check │ │ │ ├── 1.sh │ │ │ ├── containing.py │ │ │ ├── in.py │ │ │ ├── integer-match.py │ │ │ ├── size-match.py │ │ │ └── string-match.py │ │ ├── example │ │ │ └── 1.sh │ │ └── init │ │ │ ├── gen_words.sh │ │ │ ├── install_nettools.sh │ │ │ ├── nested_folders.sh │ │ │ └── stock-log.sh │ │ ├── 3 │ │ ├── check │ │ │ ├── 1.sh │ │ │ ├── containing.py │ │ │ ├── in.py │ │ │ ├── integer-match.py │ │ │ ├── size-match.py │ │ │ └── string-match.py │ │ ├── example │ │ │ └── 1.sh │ │ └── init │ │ │ ├── gen_words.sh │ │ │ ├── install_nettools.sh │ │ │ ├── nested_folders.sh │ │ │ └── stock-log.sh │ │ ├── 4 │ │ ├── check │ │ │ ├── 1.sh │ │ │ ├── containing.py │ │ │ ├── in.py │ │ │ ├── integer-match.py │ │ │ ├── size-match.py │ │ │ └── string-match.py │ │ ├── example │ │ │ └── 1.sh │ │ └── init │ │ │ ├── gen_words.sh │ │ │ ├── install_nettools.sh │ │ │ ├── nested_folders.sh │ │ │ └── stock-log.sh │ │ ├── 5 │ │ ├── check │ │ │ ├── containing.py │ │ │ ├── in.py │ │ │ ├── integer-match.py │ │ │ ├── size-match.py │ │ │ └── string-match.py │ │ ├── checking │ │ │ ├── 0.sh │ │ │ ├── 1.sh │ │ │ └── 2.sh │ │ ├── example │ │ │ ├── 0.sh │ │ │ ├── 1.sh │ │ │ └── 2.sh │ │ ├── file.json │ │ ├── init │ │ │ └── 1.sh │ │ ├── new.json │ │ └── prompt.md │ │ ├── 6 │ │ ├── collected.json │ │ ├── collecting.py │ │ ├── new.json │ │ ├── permission.json │ │ └── stackoverflow │ │ │ └── spider.py │ │ ├── 7 │ │ ├── check │ │ │ ├── 1.sh │ │ │ ├── containing.py │ │ │ ├── in.py │ │ │ ├── integer-match.py │ │ │ ├── size-match.py │ │ │ └── string-match.py │ │ ├── example │ │ │ └── 1.sh │ │ └── init │ │ │ ├── gen_words.sh │ │ │ ├── install_nettools.sh │ │ │ ├── nested_folders.sh │ │ │ └── stock-log.sh │ │ └── dev │ │ ├── check │ │ ├── 0.sh │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ │ ├── example │ │ └── 0.sh │ │ └── init │ │ └── stock-log.sh ├── eval.sh ├── eval │ ├── AgentLM-13b-eval-all.sh │ ├── AgentLM-70b-eval-all.sh │ ├── AgentLM-7b-eval-all.sh │ └── single-task │ │ ├── alfworld.sh │ │ ├── card.sh │ │ ├── db.sh │ │ ├── eval_single_setup.sh │ │ ├── kg.sh │ │ ├── mind2web.sh │ │ ├── os.sh │ │ └── webshop.sh ├── evaluate.py ├── readme-zh.md ├── readme.md ├── requirements.txt └── src │ ├── __init__.py │ ├── agent.py │ ├── agents │ ├── __init__.py │ ├── do_nothing_agent.py │ └── tgi_client.py │ ├── configs.py │ ├── task.py │ ├── tasks │ ├── __init__.py │ ├── alfworld │ │ ├── __init__.py │ │ ├── configs │ │ │ └── base_config.yaml │ │ ├── environment.py │ │ ├── prompts │ │ │ ├── alfworld_multiturn.json │ │ │ ├── alfworld_multiturn_cot.json │ │ │ ├── alfworld_multiturn_new.json │ │ │ ├── alfworld_multiturn_plan_first.json │ │ │ ├── alfworld_multiturn_pure.json │ │ │ ├── alfworld_multiturn_react.json │ │ │ └── alfworld_singleturn.json │ │ ├── task.py │ │ └── utils.py │ ├── card_game │ │ ├── AI_SDK │ │ │ ├── C++ │ │ │ │ ├── Action.cpp │ │ │ │ ├── Action.hpp │ │ │ │ ├── Action_sample.cpp │ │ │ │ ├── Makefile │ │ │ │ ├── main.cpp │ │ │ │ └── sdk │ │ │ │ │ ├── ai_client.hpp │ │ │ │ │ └── jsoncpp │ │ │ │ │ ├── json │ │ │ │ │ ├── json-forwards.h │ │ │ │ │ └── json.h │ │ │ │ │ └── jsoncpp.cpp │ │ │ └── Python │ │ │ │ ├── AI_Cn.py │ │ │ │ ├── AI_Cn_en.py │ │ │ │ ├── AI_En.py │ │ │ │ ├── Action.py │ │ │ │ ├── action1.py │ │ │ │ ├── action2.py │ │ │ │ ├── action3.py │ │ │ │ ├── basline1.py │ │ │ │ ├── basline2.py │ │ │ │ ├── basline3.py │ │ │ │ ├── client.py │ │ │ │ ├── main.py │ │ │ │ ├── prompt │ │ │ │ ├── cn.py │ │ │ │ ├── cn_en.py │ │ │ │ └── en.py │ │ │ │ ├── sdk │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── ai_client.cpython-310-x86_64-linux-gnu.so │ │ │ │ ├── ai_client.cpython-38-x86_64-linux-gnu.so │ │ │ │ ├── ai_client.hpp │ │ │ │ ├── ai_client.pyi │ │ │ │ ├── build │ │ │ │ │ ├── CMakeCache.txt │ │ │ │ │ ├── CMakeFiles │ │ │ │ │ │ ├── 3.25.0 │ │ │ │ │ │ │ ├── CMakeCXXCompiler.cmake │ │ │ │ │ │ │ ├── CMakeDetermineCompilerABI_CXX.bin │ │ │ │ │ │ │ ├── CMakeSystem.cmake │ │ │ │ │ │ │ └── CompilerIdCXX │ │ │ │ │ │ │ │ ├── CMakeCXXCompilerId.cpp │ │ │ │ │ │ │ │ └── a.out │ │ │ │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ │ │ │ ├── CMakeOutput.log │ │ │ │ │ │ ├── Makefile.cmake │ │ │ │ │ │ ├── Makefile2 │ │ │ │ │ │ ├── TargetDirectories.txt │ │ │ │ │ │ ├── ai_client.dir │ │ │ │ │ │ │ ├── DependInfo.cmake │ │ │ │ │ │ │ ├── build.make │ │ │ │ │ │ │ ├── cmake_clean.cmake │ │ │ │ │ │ │ ├── compiler_depend.make │ │ │ │ │ │ │ ├── compiler_depend.ts │ │ │ │ │ │ │ ├── depend.make │ │ │ │ │ │ │ ├── flags.make │ │ │ │ │ │ │ ├── jsoncpp │ │ │ │ │ │ │ │ ├── jsoncpp.cpp.o │ │ │ │ │ │ │ │ └── jsoncpp.cpp.o.d │ │ │ │ │ │ │ ├── link.txt │ │ │ │ │ │ │ ├── progress.make │ │ │ │ │ │ │ ├── py_ai_sdk.cpp.o │ │ │ │ │ │ │ └── py_ai_sdk.cpp.o.d │ │ │ │ │ │ ├── cmake.check_cache │ │ │ │ │ │ └── progress.marks │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── bin │ │ │ │ │ │ └── ai_client.cpython-310-x86_64-linux-gnu.so │ │ │ │ │ └── cmake_install.cmake │ │ │ │ ├── jsoncpp │ │ │ │ │ ├── json │ │ │ │ │ │ ├── json-forwards.h │ │ │ │ │ │ └── json.h │ │ │ │ │ └── jsoncpp.cpp │ │ │ │ ├── py_ai_sdk.cpp │ │ │ │ ├── py_json_cast.hpp │ │ │ │ └── pyd │ │ │ │ │ ├── ai_client.cp38-win_amd64.pyd │ │ │ │ │ ├── ai_client.cpython-310-x86_64-linux-gnu.so │ │ │ │ │ ├── ai_client.cpython-36m-x86_64-linux-gnu.so │ │ │ │ │ ├── ai_client.cpython-38-darwin.so │ │ │ │ │ └── ai_client.cpython-38-x86_64-linux-gnu.so │ │ │ │ └── test.py │ │ ├── Tools │ │ │ └── player.py │ │ ├── __init__.py │ │ ├── judger │ │ │ ├── cal_metric.py │ │ │ ├── judger.py │ │ │ ├── rserver.py │ │ │ └── run_all.py │ │ ├── logic │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── bin │ │ │ │ └── main │ │ │ └── src │ │ │ │ ├── aqua_sdk.cpp │ │ │ │ ├── fish.cpp │ │ │ │ ├── fish.h │ │ │ │ ├── fishset.cpp │ │ │ │ ├── fishset.h │ │ │ │ ├── game.cpp │ │ │ │ ├── game.h │ │ │ │ ├── jsoncpp │ │ │ │ ├── json │ │ │ │ │ ├── json-forwards.h │ │ │ │ │ └── json.h │ │ │ │ └── jsoncpp.cpp │ │ │ │ ├── main │ │ │ │ ├── main.cpp │ │ │ │ ├── player.cpp │ │ │ │ ├── player.h │ │ │ │ └── timer.h │ │ ├── server.py │ │ ├── task.py │ │ └── utils.py │ ├── composite_task.py │ ├── dbbench │ │ ├── Interaction.py │ │ ├── __init__.py │ │ └── requirements.txt │ ├── example_task.py │ ├── knowledgegraph │ │ ├── __init__.py │ │ ├── api.py │ │ ├── ontology │ │ │ ├── fb_roles │ │ │ ├── fb_types │ │ │ ├── reverse_properties │ │ │ └── vocab.json │ │ ├── requirements.txt │ │ ├── task.py │ │ └── utils │ │ │ ├── logic_form_util.py │ │ │ ├── semparse_util.py │ │ │ └── sparql_executer.py │ ├── mind2web │ │ ├── __init__.py │ │ ├── data_utils │ │ │ ├── __init__.py │ │ │ └── dom_utils.py │ │ ├── dataloader.py │ │ └── task.py │ ├── os_interaction │ │ ├── __init__.py │ │ ├── images.py │ │ ├── requirements.txt │ │ └── task.py │ └── webshop │ │ ├── .github │ │ ├── ISSUE_TEMPLATE.md │ │ ├── PULL_REQUEST_TEMPLATE.md │ │ └── workflows │ │ │ └── pytest.yml │ │ ├── .gitignore │ │ ├── LICENSE.md │ │ ├── README.md │ │ ├── __init__.py │ │ ├── assets │ │ ├── diagram.gif │ │ ├── model_ckpts.png │ │ └── transfer-logic.png │ │ ├── baseline_models │ │ ├── .gitignore │ │ ├── README.md │ │ ├── agent.py │ │ ├── env.py │ │ ├── generate_search.py │ │ ├── logger.py │ │ ├── models │ │ │ ├── bert.py │ │ │ ├── modules.py │ │ │ └── rnn.py │ │ ├── requirements.txt │ │ ├── test.py │ │ ├── train_choice_il.py │ │ ├── train_rl.py │ │ └── train_search_il.py │ │ ├── conftest.py │ │ ├── requirements.txt │ │ ├── run_dev.sh │ │ ├── run_envs │ │ ├── run_web_agent_site_env.py │ │ └── run_web_agent_text_env.py │ │ ├── run_prod.sh │ │ ├── run_web_agent_site_env.sh │ │ ├── run_web_agent_text_env.sh │ │ ├── search_engine │ │ ├── convert_product_file_format.py │ │ ├── lucene_searcher.py │ │ └── run_indexing.sh │ │ ├── setup.sh │ │ ├── tests │ │ ├── transfer │ │ │ ├── mocks │ │ │ │ ├── mock_parse_item_page_amz │ │ │ │ ├── mock_parse_item_page_ebay │ │ │ │ ├── mock_parse_item_page_ws │ │ │ │ ├── mock_parse_item_page_ws_desc │ │ │ │ ├── mock_parse_item_page_ws_feat │ │ │ │ ├── mock_parse_results_amz │ │ │ │ ├── mock_parse_results_ebay │ │ │ │ └── mock_parse_results_ws │ │ │ └── test_predict_help.py │ │ └── web-agent-site │ │ │ ├── engine │ │ │ ├── test_goal.py │ │ │ └── test_normalize.py │ │ │ └── test_utils.py │ │ ├── transfer │ │ ├── README.md │ │ ├── __init__.py │ │ ├── app.py │ │ ├── predict_help.py │ │ └── webshop_lite.py │ │ └── web_agent_site │ │ ├── __init__.py │ │ ├── app.py │ │ ├── attributes │ │ ├── annotate.py │ │ └── generate_attrs.py │ │ ├── engine │ │ ├── __init__.py │ │ ├── engine.py │ │ ├── goal.py │ │ └── normalize.py │ │ ├── envs │ │ ├── __init__.py │ │ ├── chromedriver │ │ ├── web_agent_site_env.py │ │ └── web_agent_text_env.py │ │ ├── models │ │ ├── __init__.py │ │ └── models.py │ │ ├── static │ │ ├── images │ │ │ └── no-image-available.png │ │ └── style.css │ │ ├── templates │ │ ├── attributes_page.html │ │ ├── description_page.html │ │ ├── done_page.html │ │ ├── features_page.html │ │ ├── item_page.html │ │ ├── results_page.html │ │ ├── review_page.html │ │ └── search_page.html │ │ └── utils.py │ └── utils.py ├── README-zh.md ├── README.md ├── assets ├── head-figure.png ├── head-figure.svg ├── main-figure.png └── main-figure.svg ├── docker ├── agentlm-13b.yml ├── agentlm-70b.yml └── agentlm-7b.yml ├── docs ├── README.md ├── index.html └── static │ ├── css │ ├── bulma-carousel.min.css │ ├── bulma-slider.min.css │ ├── bulma.css.map.txt │ ├── bulma.min.css │ ├── fontawesome.all.min.css │ └── index.css │ ├── images │ ├── case-study.svg │ ├── error-analysis.svg │ ├── favicon.ico │ ├── head-figure.png │ ├── head-figure.svg │ ├── main-figure.png │ └── main-figure.svg │ └── js │ ├── bulma-carousel.js │ ├── bulma-carousel.min.js │ ├── bulma-slider.js │ ├── bulma-slider.min.js │ ├── fontawesome.all.min.js │ └── index.js ├── eval_general ├── eval_gsm8k_tgi.py ├── eval_mmlu_hf.py ├── eval_mt_bench_tgi.py └── requirements.txt └── eval_heldout ├── hotpotQA ├── eval_hotpot.py ├── requirements.txt └── src │ ├── agent_arch.py │ ├── config.py │ ├── data │ ├── easy.joblib │ ├── hard.joblib │ └── medium.joblib │ ├── evaluate.py │ ├── fewshots.py │ ├── hotpotqa_env.py │ ├── llms.py │ ├── pre_prompt.py │ ├── prompt.txt │ ├── utils.py │ ├── wikienv.py │ └── wrappers.py ├── miniwob++ ├── .gitignore ├── LICENSE ├── README.md ├── available_tasks.txt ├── computergym │ ├── .gitignore │ ├── README.md │ ├── computergym │ │ ├── __init__.py │ │ └── miniwob │ │ │ ├── base_env.py │ │ │ └── miniwob_interface │ │ │ ├── __init__.py │ │ │ ├── action.py │ │ │ ├── environment.py │ │ │ ├── fields.py │ │ │ ├── html │ │ │ ├── .gitignore │ │ │ ├── common │ │ │ │ ├── shapes.js │ │ │ │ ├── special │ │ │ │ │ ├── book-flight │ │ │ │ │ │ └── domestic.js │ │ │ │ │ ├── checkbox-numbers │ │ │ │ │ │ ├── ch_0.png │ │ │ │ │ │ ├── ch_1.png │ │ │ │ │ │ ├── ch_2.png │ │ │ │ │ │ ├── ch_3.png │ │ │ │ │ │ ├── ch_4.png │ │ │ │ │ │ ├── ch_5.png │ │ │ │ │ │ ├── ch_6.png │ │ │ │ │ │ ├── ch_7.png │ │ │ │ │ │ ├── ch_8.png │ │ │ │ │ │ └── ch_9.png │ │ │ │ │ ├── click-pie │ │ │ │ │ │ ├── raphael.icons.min.js │ │ │ │ │ │ ├── raphael.min.js │ │ │ │ │ │ └── wheelnav.min.js │ │ │ │ │ ├── drag-cube │ │ │ │ │ │ ├── blank.png │ │ │ │ │ │ ├── cube.css │ │ │ │ │ │ └── cube.js │ │ │ │ │ ├── email-inbox-nl │ │ │ │ │ │ └── templates.js │ │ │ │ │ ├── email-inbox │ │ │ │ │ │ ├── delete.png │ │ │ │ │ │ ├── email-inbox.css │ │ │ │ │ │ ├── forward.png │ │ │ │ │ │ ├── left-arrow-white.png │ │ │ │ │ │ ├── left-arrow.png │ │ │ │ │ │ ├── reply.png │ │ │ │ │ │ ├── search.png │ │ │ │ │ │ ├── send.png │ │ │ │ │ │ ├── star-clicked.png │ │ │ │ │ │ └── star.png │ │ │ │ │ ├── navigate-tree │ │ │ │ │ │ ├── images │ │ │ │ │ │ │ ├── ajax-loader.gif │ │ │ │ │ │ │ ├── file.gif │ │ │ │ │ │ │ ├── folder-closed.gif │ │ │ │ │ │ │ ├── folder.gif │ │ │ │ │ │ │ ├── minus.gif │ │ │ │ │ │ │ ├── plus.gif │ │ │ │ │ │ │ ├── treeview-black-line.gif │ │ │ │ │ │ │ ├── treeview-black.gif │ │ │ │ │ │ │ ├── treeview-default-line.gif │ │ │ │ │ │ │ ├── treeview-default.gif │ │ │ │ │ │ │ ├── treeview-famfamfam-line.gif │ │ │ │ │ │ │ ├── treeview-famfamfam.gif │ │ │ │ │ │ │ ├── treeview-gray-line.gif │ │ │ │ │ │ │ ├── treeview-gray.gif │ │ │ │ │ │ │ ├── treeview-red-line.gif │ │ │ │ │ │ │ └── treeview-red.gif │ │ │ │ │ │ ├── jquery.treeview.css │ │ │ │ │ │ └── jquery.treeview.min.js │ │ │ │ │ ├── search-engine │ │ │ │ │ │ └── jquery.twbsPagination.min.js │ │ │ │ │ ├── social-media │ │ │ │ │ │ ├── like-hover.png │ │ │ │ │ │ ├── like.png │ │ │ │ │ │ ├── more-hover.png │ │ │ │ │ │ ├── more.png │ │ │ │ │ │ ├── reply-hover.png │ │ │ │ │ │ ├── reply.png │ │ │ │ │ │ ├── retweet-hover.png │ │ │ │ │ │ ├── retweet.png │ │ │ │ │ │ ├── share-hover.png │ │ │ │ │ │ └── share.png │ │ │ │ │ ├── text-editor │ │ │ │ │ │ ├── quill.min.js │ │ │ │ │ │ └── quill.snow.css │ │ │ │ │ └── tic-tac-toe │ │ │ │ │ │ ├── o.png │ │ │ │ │ │ └── x.png │ │ │ │ └── ui_utils.js │ │ │ ├── core │ │ │ │ ├── core.css │ │ │ │ ├── core.js │ │ │ │ ├── d3.v3.min.js │ │ │ │ ├── jquery-ui │ │ │ │ │ ├── external │ │ │ │ │ │ └── jquery │ │ │ │ │ │ │ └── jquery.js │ │ │ │ │ ├── images │ │ │ │ │ │ ├── ui-icons_444444_256x240.png │ │ │ │ │ │ ├── ui-icons_555555_256x240.png │ │ │ │ │ │ ├── ui-icons_777620_256x240.png │ │ │ │ │ │ ├── ui-icons_777777_256x240.png │ │ │ │ │ │ ├── ui-icons_cc0000_256x240.png │ │ │ │ │ │ └── ui-icons_ffffff_256x240.png │ │ │ │ │ ├── jquery-ui.min.css │ │ │ │ │ ├── jquery-ui.min.js │ │ │ │ │ ├── jquery-ui.structure.min.css │ │ │ │ │ └── jquery-ui.theme.min.css │ │ │ │ ├── jscolor.min.js │ │ │ │ └── record.js │ │ │ ├── flight │ │ │ │ ├── AA │ │ │ │ │ ├── apps │ │ │ │ │ │ └── common │ │ │ │ │ │ │ └── js │ │ │ │ │ │ │ ├── aacom.js │ │ │ │ │ │ │ ├── aacomDevice.js │ │ │ │ │ │ │ ├── airportcode.js │ │ │ │ │ │ │ ├── cookieconsent.js │ │ │ │ │ │ │ ├── jquery │ │ │ │ │ │ │ └── aacom │ │ │ │ │ │ │ │ ├── plugins │ │ │ │ │ │ │ │ ├── aaAirportAutoComplete.js │ │ │ │ │ │ │ │ ├── aaCache.js │ │ │ │ │ │ │ │ ├── aaCookie.js │ │ │ │ │ │ │ │ ├── aaCountryLanSelect.js │ │ │ │ │ │ │ │ ├── aaDropdownPanel.js │ │ │ │ │ │ │ │ ├── aaFooterAds.js │ │ │ │ │ │ │ │ └── aaTextBoxMessage.js │ │ │ │ │ │ │ │ └── utilities │ │ │ │ │ │ │ │ ├── aaUtilities-2.1.js │ │ │ │ │ │ │ │ └── aaUtils.js │ │ │ │ │ │ │ └── wa.js │ │ │ │ │ ├── content │ │ │ │ │ │ ├── common │ │ │ │ │ │ │ └── css │ │ │ │ │ │ │ │ └── v3 │ │ │ │ │ │ │ │ ├── core.css │ │ │ │ │ │ │ │ ├── jquery-ui-1.10-aa.css │ │ │ │ │ │ │ │ ├── reservation │ │ │ │ │ │ │ │ └── findFlights │ │ │ │ │ │ │ │ │ └── mobile │ │ │ │ │ │ │ │ │ └── findFlights.css │ │ │ │ │ │ │ │ └── responsive.css │ │ │ │ │ │ ├── fonts │ │ │ │ │ │ │ ├── american-v2 │ │ │ │ │ │ │ │ ├── americansans-bold.woff │ │ │ │ │ │ │ │ ├── americansans-light.woff │ │ │ │ │ │ │ │ ├── americansans-medium.woff │ │ │ │ │ │ │ │ └── americansans-regular.woff │ │ │ │ │ │ │ └── icons │ │ │ │ │ │ │ │ └── american-icons-v4-4.woff │ │ │ │ │ │ └── images │ │ │ │ │ │ │ ├── chrome │ │ │ │ │ │ │ ├── icons │ │ │ │ │ │ │ │ └── loading.gif │ │ │ │ │ │ │ └── rebrand │ │ │ │ │ │ │ │ ├── aa-flight-icon.png │ │ │ │ │ │ │ │ ├── aa-icons-flags-sprite.png │ │ │ │ │ │ │ │ ├── aa-logo.png │ │ │ │ │ │ │ │ ├── down-arrow.png │ │ │ │ │ │ │ │ ├── favicon.png │ │ │ │ │ │ │ │ ├── oneworld.png │ │ │ │ │ │ │ │ ├── shadow-down.png │ │ │ │ │ │ │ │ ├── shadow-vertical-150.png │ │ │ │ │ │ │ │ ├── shadow12-down.png │ │ │ │ │ │ │ │ ├── shadow12-up.png │ │ │ │ │ │ │ │ └── shadow3-down.png │ │ │ │ │ │ │ └── graphics │ │ │ │ │ │ │ └── icons │ │ │ │ │ │ │ └── aa-jqueryUIicons-sprite.png │ │ │ │ │ ├── dataset-AA.js │ │ │ │ │ ├── index.html │ │ │ │ │ ├── js │ │ │ │ │ │ ├── aa │ │ │ │ │ │ │ ├── common │ │ │ │ │ │ │ │ ├── aa-utility-menu.js │ │ │ │ │ │ │ │ ├── aacom-ui-1.0.0.js │ │ │ │ │ │ │ │ └── core-2.0.0.js │ │ │ │ │ │ │ ├── modules │ │ │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ │ │ ├── ajax.js │ │ │ │ │ │ │ │ ├── browserdetect.js │ │ │ │ │ │ │ │ ├── commonsetup.js │ │ │ │ │ │ │ │ ├── mobileDatePicker.js │ │ │ │ │ │ │ │ ├── utilities.js │ │ │ │ │ │ │ │ └── widgets.js │ │ │ │ │ │ │ ├── plugins │ │ │ │ │ │ │ │ └── noBounce.js │ │ │ │ │ │ │ └── shopping │ │ │ │ │ │ │ │ └── mobileSearchFlights.js │ │ │ │ │ │ └── libs │ │ │ │ │ │ │ ├── jquery │ │ │ │ │ │ │ ├── jquery-1.11.1.min.js │ │ │ │ │ │ │ ├── jquery-migrate-1.2.1.min.js │ │ │ │ │ │ │ └── ui │ │ │ │ │ │ │ │ └── 1.10 │ │ │ │ │ │ │ │ ├── i18n │ │ │ │ │ │ │ │ └── jquery.ui.datepicker-en-aa.js │ │ │ │ │ │ │ │ └── jquery-ui.min.js │ │ │ │ │ │ │ └── modernizr-2.8.1.js │ │ │ │ │ ├── original.html │ │ │ │ │ ├── surrogate │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ └── airports.json │ │ │ │ │ └── wrapper.html │ │ │ │ ├── Alaska-auto-medium │ │ │ │ │ ├── images │ │ │ │ │ │ ├── aura.png │ │ │ │ │ │ ├── cal3.png │ │ │ │ │ │ ├── chkboxes3.png │ │ │ │ │ │ ├── clear_text2.png │ │ │ │ │ │ ├── collapse.png │ │ │ │ │ │ ├── expand.png │ │ │ │ │ │ ├── flight_arrow.png │ │ │ │ │ │ ├── geo.png │ │ │ │ │ │ ├── home.png │ │ │ │ │ │ ├── info2.png │ │ │ │ │ │ ├── leftright.png │ │ │ │ │ │ ├── logo2.png │ │ │ │ │ │ └── logos │ │ │ │ │ │ │ ├── AA.png │ │ │ │ │ │ │ ├── AS.png │ │ │ │ │ │ │ ├── DL.png │ │ │ │ │ │ │ └── VX.png │ │ │ │ │ ├── index.html │ │ │ │ │ ├── mobileweb-v3-28-6227-21813.css │ │ │ │ │ ├── scripts │ │ │ │ │ │ ├── datepickr.js │ │ │ │ │ │ ├── main.js │ │ │ │ │ │ └── shopbook.js │ │ │ │ │ ├── stylesheets │ │ │ │ │ │ └── circular │ │ │ │ │ │ │ ├── ASCircularWeb-Bold.woff │ │ │ │ │ │ │ └── ASCircularWeb-Book.woff │ │ │ │ │ ├── surrogate │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ └── airports.json │ │ │ │ │ └── wrapper.html │ │ │ │ ├── Alaska-auto │ │ │ │ │ ├── images │ │ │ │ │ │ ├── aura.png │ │ │ │ │ │ ├── cal3.png │ │ │ │ │ │ ├── chkboxes3.png │ │ │ │ │ │ ├── clear_text2.png │ │ │ │ │ │ ├── collapse.png │ │ │ │ │ │ ├── expand.png │ │ │ │ │ │ ├── flight_arrow.png │ │ │ │ │ │ ├── geo.png │ │ │ │ │ │ ├── home.png │ │ │ │ │ │ ├── info2.png │ │ │ │ │ │ ├── leftright.png │ │ │ │ │ │ ├── logo2.png │ │ │ │ │ │ └── logos │ │ │ │ │ │ │ ├── AA.png │ │ │ │ │ │ │ ├── AS.png │ │ │ │ │ │ │ ├── DL.png │ │ │ │ │ │ │ └── VX.png │ │ │ │ │ ├── index.html │ │ │ │ │ ├── mobileweb-v3-28-6227-21813.css │ │ │ │ │ ├── scripts │ │ │ │ │ │ ├── datepickr.js │ │ │ │ │ │ ├── main.js │ │ │ │ │ │ └── shopbook.js │ │ │ │ │ ├── stylesheets │ │ │ │ │ │ └── circular │ │ │ │ │ │ │ ├── ASCircularWeb-Bold.woff │ │ │ │ │ │ │ └── ASCircularWeb-Book.woff │ │ │ │ │ ├── surrogate │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ └── airports.json │ │ │ │ │ └── wrapper.html │ │ │ │ ├── Alaska │ │ │ │ │ ├── dataset-Alaska.js │ │ │ │ │ ├── images │ │ │ │ │ │ ├── aura.png │ │ │ │ │ │ ├── cal3.png │ │ │ │ │ │ ├── chkboxes3.png │ │ │ │ │ │ ├── clear_text2.png │ │ │ │ │ │ ├── collapse.png │ │ │ │ │ │ ├── expand.png │ │ │ │ │ │ ├── flight_arrow.png │ │ │ │ │ │ ├── geo.png │ │ │ │ │ │ ├── home.png │ │ │ │ │ │ ├── info2.png │ │ │ │ │ │ ├── leftright.png │ │ │ │ │ │ ├── logo2.png │ │ │ │ │ │ └── logos │ │ │ │ │ │ │ ├── AA.png │ │ │ │ │ │ │ ├── AS.png │ │ │ │ │ │ │ ├── DL.png │ │ │ │ │ │ │ └── VX.png │ │ │ │ │ ├── index.html │ │ │ │ │ ├── mobileweb-v3-28-6227-21813.css │ │ │ │ │ ├── original.html │ │ │ │ │ ├── scripts │ │ │ │ │ │ ├── datepickr.js │ │ │ │ │ │ ├── main.js │ │ │ │ │ │ └── shopbook.js │ │ │ │ │ ├── stylesheets │ │ │ │ │ │ └── circular │ │ │ │ │ │ │ ├── ASCircularWeb-Bold.woff │ │ │ │ │ │ │ └── ASCircularWeb-Book.woff │ │ │ │ │ ├── surrogate │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ └── airports.json │ │ │ │ │ └── wrapper.html │ │ │ │ └── flight-common │ │ │ │ │ ├── inject.js │ │ │ │ │ ├── wrapper.css │ │ │ │ │ └── wrapper.js │ │ │ └── miniwob │ │ │ │ ├── bisect-angle.html │ │ │ │ ├── book-flight-nodelay.html │ │ │ │ ├── book-flight.html │ │ │ │ ├── chase-circle.html │ │ │ │ ├── choose-date-easy.html │ │ │ │ ├── choose-date-medium.html │ │ │ │ ├── choose-date-nodelay.html │ │ │ │ ├── choose-date.html │ │ │ │ ├── choose-list.html │ │ │ │ ├── circle-center.html │ │ │ │ ├── click-button-sequence.html │ │ │ │ ├── click-button.html │ │ │ │ ├── click-checkboxes-large.html │ │ │ │ ├── click-checkboxes-soft.html │ │ │ │ ├── click-checkboxes-transfer.html │ │ │ │ ├── click-checkboxes.html │ │ │ │ ├── click-collapsible-2-nodelay.html │ │ │ │ ├── click-collapsible-2.html │ │ │ │ ├── click-collapsible-nodelay.html │ │ │ │ ├── click-collapsible.html │ │ │ │ ├── click-color.html │ │ │ │ ├── click-dialog-2.html │ │ │ │ ├── click-dialog.html │ │ │ │ ├── click-link.html │ │ │ │ ├── click-menu-2.html │ │ │ │ ├── click-menu.html │ │ │ │ ├── click-option.html │ │ │ │ ├── click-pie-nodelay.html │ │ │ │ ├── click-pie.html │ │ │ │ ├── click-scroll-list.html │ │ │ │ ├── click-shades.html │ │ │ │ ├── click-shape.html │ │ │ │ ├── click-tab-2-easy.html │ │ │ │ ├── click-tab-2-hard.html │ │ │ │ ├── click-tab-2-medium.html │ │ │ │ ├── click-tab-2.html │ │ │ │ ├── click-tab.html │ │ │ │ ├── click-test-2.html │ │ │ │ ├── click-test-transfer.html │ │ │ │ ├── click-test.html │ │ │ │ ├── click-widget.html │ │ │ │ ├── copy-paste-2.html │ │ │ │ ├── copy-paste.html │ │ │ │ ├── count-shape.html │ │ │ │ ├── count-sides.html │ │ │ │ ├── drag-box.html │ │ │ │ ├── drag-cube.html │ │ │ │ ├── drag-item.html │ │ │ │ ├── drag-items-grid.html │ │ │ │ ├── drag-items.html │ │ │ │ ├── drag-shapes.html │ │ │ │ ├── drag-sort-numbers.html │ │ │ │ ├── email-inbox-delete.html │ │ │ │ ├── email-inbox-forward-nl-turk.html │ │ │ │ ├── email-inbox-forward-nl.html │ │ │ │ ├── email-inbox-forward.html │ │ │ │ ├── email-inbox-important.html │ │ │ │ ├── email-inbox-nl-turk.html │ │ │ │ ├── email-inbox-noscroll.html │ │ │ │ ├── email-inbox-reply.html │ │ │ │ ├── email-inbox-star-reply.html │ │ │ │ ├── email-inbox.html │ │ │ │ ├── enter-date.html │ │ │ │ ├── enter-password.html │ │ │ │ ├── enter-text-2.html │ │ │ │ ├── enter-text-dynamic.html │ │ │ │ ├── enter-text.html │ │ │ │ ├── enter-time.html │ │ │ │ ├── find-midpoint.html │ │ │ │ ├── find-word.html │ │ │ │ ├── focus-text-2.html │ │ │ │ ├── focus-text.html │ │ │ │ ├── grid-coordinate.html │ │ │ │ ├── guess-number.html │ │ │ │ ├── highlight-text-2.html │ │ │ │ ├── highlight-text.html │ │ │ │ ├── identify-shape.html │ │ │ │ ├── login-user-popup.html │ │ │ │ ├── login-user.html │ │ │ │ ├── moving-items.html │ │ │ │ ├── multi-layouts.html │ │ │ │ ├── multi-orderings.html │ │ │ │ ├── navigate-tree.html │ │ │ │ ├── number-checkboxes.html │ │ │ │ ├── read-table-2.html │ │ │ │ ├── read-table.html │ │ │ │ ├── resize-textarea.html │ │ │ │ ├── right-angle.html │ │ │ │ ├── scroll-text-2.html │ │ │ │ ├── scroll-text.html │ │ │ │ ├── search-engine.html │ │ │ │ ├── simon-says.html │ │ │ │ ├── simple-algebra.html │ │ │ │ ├── simple-arithmetic.html │ │ │ │ ├── social-media-all.html │ │ │ │ ├── social-media-some.html │ │ │ │ ├── social-media.html │ │ │ │ ├── terminal.html │ │ │ │ ├── terminal_reproduce.html │ │ │ │ ├── text-editor.html │ │ │ │ ├── text-transform.html │ │ │ │ ├── tic-tac-toe.html │ │ │ │ ├── unicode-test.html │ │ │ │ ├── use-autocomplete-nodelay.html │ │ │ │ ├── use-autocomplete.html │ │ │ │ ├── use-colorwheel-2.html │ │ │ │ ├── use-colorwheel.html │ │ │ │ ├── use-slider-2.html │ │ │ │ ├── use-slider.html │ │ │ │ ├── use-spinner.html │ │ │ │ └── visual-addition.html │ │ │ ├── instance.py │ │ │ ├── reward.py │ │ │ ├── screenshot.py │ │ │ ├── state.py │ │ │ └── utils.py │ └── setup.py ├── eval-gpt-3.5-turbo.sh ├── eval-gpt-4.sh ├── eval-tgi.sh ├── llm_agent.py ├── main.py ├── metrics.py ├── prompt.py ├── prompt │ ├── action.txt │ ├── base.txt │ ├── choose-list │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-button-sequence │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-button │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-checkboxes-large │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-checkboxes-soft │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-checkboxes-transfer │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-checkboxes │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-collapsible-2 │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-collapsible │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-dialog-2 │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-dialog │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-menu │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-option │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-scroll-list │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-shades │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-shape │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-tab-2-hard │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-tab-2 │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-tab │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-test │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── click-widget │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── count-shape │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── email-inbox-forward-nl-turk │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── email-inbox-forward-nl │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── email-inbox-nl-turk │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── email-inbox │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── enter-date │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── enter-time │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── example.txt │ ├── few-shot.json │ ├── first_action.txt │ ├── focus-text │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── grid-coordinate │ │ ├── action.txt │ │ ├── base.txt │ │ ├── check_finish.txt │ │ ├── done.txt │ │ ├── edit_plan.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── new_plan.txt │ │ ├── rci_action.txt │ │ ├── rci_answer.txt │ │ └── update_action.txt │ ├── identify-shape │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── initialize_plan.txt │ ├── login-user-popup │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── multi-layouts │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── navigate-tree │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── rci_action.txt │ ├── search-engine │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── simple-algebra │ │ ├── action.txt │ │ ├── base.txt │ │ ├── check_finish.txt │ │ ├── done.txt │ │ ├── edit_plan.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── new_plan.txt │ │ ├── rci_action.txt │ │ ├── rci_answer.txt │ │ └── update_action.txt │ ├── social-media-all │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── social-media-some │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── social-media │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── terminal │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── tic-tac-toe │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ ├── update_action.txt │ ├── use-autocomplete │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt │ └── use-spinner │ │ ├── action.txt │ │ ├── base.txt │ │ ├── example.txt │ │ ├── first_action.txt │ │ ├── initialize_plan.txt │ │ ├── rci_action.txt │ │ └── update_action.txt └── requirements.txt ├── rewoo ├── .gitignore ├── LICENSE ├── README.md ├── algos │ ├── PWS.py │ ├── __init__.py │ ├── notool.py │ └── react.py ├── alpaca │ ├── __init__.py │ ├── lora.py │ ├── templates │ │ ├── README.md │ │ ├── alpaca.json │ │ ├── alpaca_legacy.json │ │ ├── alpaca_short.json │ │ └── vigogne.json │ └── utils │ │ ├── README.md │ │ ├── __init__.py │ │ ├── callbacks.py │ │ └── prompter.py ├── eval-gpt-3.5-turbo.sh ├── eval-gpt-4.sh ├── eval-tgi.sh ├── metrics.py ├── nodes │ ├── LLMNode.py │ ├── Node.py │ ├── NodeCofig.py │ ├── Planner.py │ ├── Solver.py │ ├── Worker.py │ └── __init__.py ├── prompts │ ├── __init__.py │ ├── fewshots.py │ ├── planner.py │ ├── solver.py │ └── wiki_prompt.py ├── requirements.txt ├── run_eval.py └── utils │ ├── CustomDocstoreExplorer.py │ ├── DataLoader.py │ ├── Evaluator.py │ ├── __init__.py │ └── util.py ├── science-world ├── .gitignore ├── README.md ├── data_utils │ ├── READMD.md │ ├── __init__.py │ ├── data_convert.py │ ├── data_utils.py │ ├── demos.json │ └── goldpaths-all.zip ├── eval-gpt-3.5.sh ├── eval-gpt-4.sh ├── eval-tgi.sh ├── eval.py ├── eval_utils.py ├── fast_agent │ ├── deepspeed_reqs.txt │ ├── ds_train.py │ ├── ds_train.sh │ └── zero_2_bf16.json ├── metrics.py ├── prompts │ ├── README.md │ ├── convert.py │ ├── prompt.json │ └── prompt_orig.json ├── requirements.txt └── slow_agent │ ├── local_llm.py │ ├── run_gradio.py │ └── utils.py └── webarena ├── .gitignore ├── LICENSE ├── README.md ├── agent ├── __init__.py ├── agent.py └── prompts │ ├── README.md │ ├── __init__.py │ ├── prompt_constructor.py │ ├── raw │ ├── p_cot_id_actree_2s.py │ └── p_direct_id_actree_2s.py │ └── to_json.py ├── browser_env ├── __init__.py ├── actions.py ├── async_envs.py ├── auto_login.py ├── constants.py ├── env_config.py ├── envs.py ├── helper_functions.py ├── processors.py ├── py.typed ├── trajectory.py └── utils.py ├── config_files ├── examples │ ├── 1.json │ ├── 2.json │ ├── 3.json │ └── 4.json └── test.raw.json ├── environment_docker ├── README.md └── webarena-homepage │ ├── app.py │ ├── static │ └── figures │ │ ├── calculator.png │ │ ├── cms.png │ │ ├── gitlab.png │ │ ├── manual1.png │ │ ├── manual2.png │ │ ├── map.png │ │ ├── onestopshop.png │ │ ├── password.png │ │ ├── reddit.png │ │ ├── scratchpad.png │ │ └── wikipedia.png │ └── templates │ ├── calculator.html │ ├── index.html │ └── scratchpad.html ├── eval-gpt-3.5-turbo.sh ├── eval-gpt-4.sh ├── eval-tgi.sh ├── evaluation_harness ├── __init__.py ├── evaluators.py └── helper_functions.py ├── llms ├── __init__.py ├── lm_config.py ├── providers │ └── openai_utils.py └── tokenizers.py ├── prepare.sh ├── run.py ├── setup.cfg └── setup.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tgi_data/ 2 | __pycache__/ 3 | .DS_Store -------------------------------------------------------------------------------- /AgentBench.old/configs/agents/do_nothing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/agents/do_nothing.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/agents/tgi_clients/AgentLM-13b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/agents/tgi_clients/AgentLM-13b.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/agents/tgi_clients/AgentLM-70b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/agents/tgi_clients/AgentLM-70b.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/agents/tgi_clients/AgentLM-7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/agents/tgi_clients/AgentLM-7b.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/alfworld/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/alfworld/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/alfworld/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/alfworld/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/card_game/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/card_game/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/card_game/ext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/card_game/ext.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/card_game/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/card_game/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/dbbench/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/dbbench/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/dbbench/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/dbbench/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/knowledgegraph/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/knowledgegraph/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/knowledgegraph/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/knowledgegraph/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/mind2web/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/mind2web/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/mind2web/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/mind2web/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/os_interaction/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/os_interaction/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/os_interaction/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/os_interaction/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/webshop/dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/webshop/dev.yaml -------------------------------------------------------------------------------- /AgentBench.old/configs/tasks/webshop/std.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/configs/tasks/webshop/std.yaml -------------------------------------------------------------------------------- /AgentBench.old/data/alfworld/dev.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/alfworld/dev.json -------------------------------------------------------------------------------- /AgentBench.old/data/alfworld/std.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/alfworld/std.json -------------------------------------------------------------------------------- /AgentBench.old/data/dbbench/dev.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/dbbench/dev.jsonl -------------------------------------------------------------------------------- /AgentBench.old/data/dbbench/standard.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/dbbench/standard.jsonl -------------------------------------------------------------------------------- /AgentBench.old/data/knowledgegraph/dev.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/knowledgegraph/dev.json -------------------------------------------------------------------------------- /AgentBench.old/data/knowledgegraph/std.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/knowledgegraph/std.json -------------------------------------------------------------------------------- /AgentBench.old/data/mind2web/prompt/llm_prompt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/mind2web/prompt/llm_prompt.json -------------------------------------------------------------------------------- /AgentBench.old/data/mind2web/prompt/llm_prompt_cot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/mind2web/prompt/llm_prompt_cot.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/1/stock.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/1/stock.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/2/environment.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/2/environment.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/3/ac.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/3/ac.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/N11.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/N11.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/N225.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/N225.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/N37.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/N37.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/N4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/N4.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/N41.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/N41.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/Q09.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/Q09.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/Q19.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/Q19.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/Q30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/Q30.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/Q47.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/Q47.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/4/Q49.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/4/Q49.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/5/new.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/5/new.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/6-backup.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/6-backup.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/6/new.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/6/new.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/7/2023-06-07-00-13-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/7/2023-06-07-00-13-14.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/data/dev.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/data/dev.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/res/dockerfiles/default: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/res/dockerfiles/default -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/res/dockerfiles/packages: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/res/dockerfiles/packages -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/res/dockerfiles/ubuntu: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | RUN echo "tmp" > file.txt 3 | CMD ["bash"] -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/check/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/check/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/example/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/example/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/init/gen_words.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/init/gen_words.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/init/install_nettools.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/init/install_nettools.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/init/nested_folders.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/init/nested_folders.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/1/init/stock-log.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/1/init/stock-log.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/check/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/check/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/example/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/example/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/init/gen_words.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/init/gen_words.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/init/install_nettools.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/init/install_nettools.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/init/nested_folders.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/init/nested_folders.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/2/init/stock-log.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/2/init/stock-log.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/check/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/check/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/example/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/example/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/init/gen_words.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/init/gen_words.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/init/install_nettools.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/init/install_nettools.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/init/nested_folders.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/init/nested_folders.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/3/init/stock-log.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/3/init/stock-log.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/check/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/check/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/example/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/example/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/init/gen_words.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/init/gen_words.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/init/install_nettools.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/init/install_nettools.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/init/nested_folders.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/init/nested_folders.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/4/init/stock-log.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/4/init/stock-log.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/checking/0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/checking/0.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/checking/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/checking/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/checking/2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/checking/2.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/example/0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/example/0.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/example/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/example/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/example/2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/example/2.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/file.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/file.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/init/1.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/new.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/new.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/5/prompt.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/5/prompt.md -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/6/collected.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/6/collected.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/6/collecting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/6/collecting.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/6/new.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/6/new.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/6/permission.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/6/permission.json -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/6/stackoverflow/spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/6/stackoverflow/spider.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/check/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/check/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/example/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/example/1.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/init/gen_words.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/init/gen_words.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/init/install_nettools.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/init/install_nettools.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/init/nested_folders.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/init/nested_folders.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/7/init/stock-log.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/7/init/stock-log.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/check/0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/check/0.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/check/containing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/check/containing.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/check/in.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/check/in.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/check/integer-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/check/integer-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/check/size-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/check/size-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/check/string-match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/check/string-match.py -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/example/0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/example/0.sh -------------------------------------------------------------------------------- /AgentBench.old/data/os_interaction/scripts/dev/init/stock-log.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/data/os_interaction/scripts/dev/init/stock-log.sh -------------------------------------------------------------------------------- /AgentBench.old/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/AgentLM-13b-eval-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/AgentLM-13b-eval-all.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/AgentLM-70b-eval-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/AgentLM-70b-eval-all.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/AgentLM-7b-eval-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/AgentLM-7b-eval-all.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/alfworld.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/alfworld.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/card.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/card.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/db.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/db.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/eval_single_setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/eval_single_setup.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/kg.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/kg.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/mind2web.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/mind2web.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/os.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/os.sh -------------------------------------------------------------------------------- /AgentBench.old/eval/single-task/webshop.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/eval/single-task/webshop.sh -------------------------------------------------------------------------------- /AgentBench.old/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/evaluate.py -------------------------------------------------------------------------------- /AgentBench.old/readme-zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/readme-zh.md -------------------------------------------------------------------------------- /AgentBench.old/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/readme.md -------------------------------------------------------------------------------- /AgentBench.old/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/requirements.txt -------------------------------------------------------------------------------- /AgentBench.old/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/agent.py -------------------------------------------------------------------------------- /AgentBench.old/src/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/agents/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/agents/do_nothing_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/agents/do_nothing_agent.py -------------------------------------------------------------------------------- /AgentBench.old/src/agents/tgi_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/agents/tgi_client.py -------------------------------------------------------------------------------- /AgentBench.old/src/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/configs.py -------------------------------------------------------------------------------- /AgentBench.old/src/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import ALFWorld -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/configs/base_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/configs/base_config.yaml -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/environment.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn.json -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn_cot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn_cot.json -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn_new.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn_new.json -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn_pure.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/prompts/alfworld_multiturn_pure.json -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/prompts/alfworld_singleturn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/prompts/alfworld_singleturn.json -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/alfworld/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/alfworld/utils.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action.hpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action_sample.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/Action_sample.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/Makefile -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/main.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/sdk/ai_client.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/sdk/ai_client.hpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/sdk/jsoncpp/json/json.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/sdk/jsoncpp/json/json.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/C++/sdk/jsoncpp/jsoncpp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/C++/sdk/jsoncpp/jsoncpp.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/AI_Cn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/AI_Cn.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/AI_Cn_en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/AI_Cn_en.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/AI_En.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/AI_En.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/Action.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/Action.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/action1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/action1.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/action2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/action2.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/action3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/action3.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline1.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline2.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/basline3.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/client.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/main.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/prompt/cn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/prompt/cn.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/prompt/cn_en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/prompt/cn_en.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/prompt/en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/prompt/en.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/CMakeLists.txt -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.hpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.pyi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/ai_client.pyi -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 3 2 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/build/Makefile -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/py_ai_sdk.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/py_ai_sdk.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/py_json_cast.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/sdk/py_json_cast.hpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/AI_SDK/Python/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/AI_SDK/Python/test.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/Tools/player.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/Tools/player.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import CardGame -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/judger/cal_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/judger/cal_metric.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/judger/judger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/judger/judger.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/judger/rserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/judger/rserver.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/judger/run_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/judger/run_all.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | cmake-build-debug -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/Makefile -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/bin/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/bin/main -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/aqua_sdk.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/aqua_sdk.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/fish.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/fish.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/fish.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/fish.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/fishset.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/fishset.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/fishset.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/fishset.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/game.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/game.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/game.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/game.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/jsoncpp/json/json.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/jsoncpp/json/json.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/jsoncpp/jsoncpp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/jsoncpp/jsoncpp.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/main -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/main.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/player.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/player.cpp -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/player.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/player.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/logic/src/timer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/logic/src/timer.h -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/server.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/card_game/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/card_game/utils.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/composite_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/composite_task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/dbbench/Interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/dbbench/Interaction.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/dbbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/dbbench/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/dbbench/requirements.txt: -------------------------------------------------------------------------------- 1 | mysql-connector-python==8.0.33 2 | docker==6.1.2 -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/example_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/example_task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import KnowledgeGraph -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/api.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/ontology/fb_roles: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/ontology/fb_roles -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/ontology/fb_types: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/ontology/fb_types -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/ontology/reverse_properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/ontology/reverse_properties -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/ontology/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/ontology/vocab.json -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/requirements.txt: -------------------------------------------------------------------------------- 1 | SPARQLWrapper 2 | networkx -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/utils/logic_form_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/utils/logic_form_util.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/utils/semparse_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/utils/semparse_util.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/knowledgegraph/utils/sparql_executer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/knowledgegraph/utils/sparql_executer.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/mind2web/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import Mind2Web -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/mind2web/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/mind2web/data_utils/dom_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/mind2web/data_utils/dom_utils.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/mind2web/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/mind2web/dataloader.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/mind2web/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/mind2web/task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/os_interaction/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import OSInteraction -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/os_interaction/images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/os_interaction/images.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/os_interaction/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/os_interaction/requirements.txt -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/os_interaction/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/os_interaction/task.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/.github/ISSUE_TEMPLATE.md -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/.github/workflows/pytest.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/.github/workflows/pytest.yml -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/.gitignore -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/LICENSE.md -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/README.md -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/assets/diagram.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/assets/diagram.gif -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/assets/model_ckpts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/assets/model_ckpts.png -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/assets/transfer-logic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/assets/transfer-logic.png -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/.gitignore -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/README.md -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/agent.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/env.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/generate_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/generate_search.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/logger.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/models/bert.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/models/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/models/modules.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/models/rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/models/rnn.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets 3 | faiss-gpu 4 | transformers 5 | wandb -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/test.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/train_choice_il.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/train_choice_il.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/train_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/train_rl.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/baseline_models/train_search_il.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/baseline_models/train_search_il.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/conftest.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/requirements.txt -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/run_dev.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/run_dev.sh -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/run_envs/run_web_agent_site_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/run_envs/run_web_agent_site_env.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/run_envs/run_web_agent_text_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/run_envs/run_web_agent_text_env.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/run_prod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m web_agent_site.app --log 3 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/run_web_agent_site_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/run_web_agent_site_env.sh -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/run_web_agent_text_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/run_web_agent_text_env.sh -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/search_engine/lucene_searcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/search_engine/lucene_searcher.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/search_engine/run_indexing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/search_engine/run_indexing.sh -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/setup.sh -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/tests/transfer/test_predict_help.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/tests/transfer/test_predict_help.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/tests/web-agent-site/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/tests/web-agent-site/test_utils.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/transfer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/transfer/README.md -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/transfer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/transfer/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/transfer/app.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/transfer/predict_help.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/transfer/predict_help.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/transfer/webshop_lite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/transfer/webshop_lite.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/app.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/attributes/annotate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/attributes/annotate.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/engine/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/engine/engine.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/engine/goal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/engine/goal.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/engine/normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/engine/normalize.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/envs/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/envs/chromedriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/envs/chromedriver -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/models/__init__.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/models/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/models/models.py -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/static/style.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/static/style.css -------------------------------------------------------------------------------- /AgentBench.old/src/tasks/webshop/web_agent_site/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/tasks/webshop/web_agent_site/utils.py -------------------------------------------------------------------------------- /AgentBench.old/src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/AgentBench.old/src/utils.py -------------------------------------------------------------------------------- /README-zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/README-zh.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/README.md -------------------------------------------------------------------------------- /assets/head-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/assets/head-figure.png -------------------------------------------------------------------------------- /assets/head-figure.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/assets/head-figure.svg -------------------------------------------------------------------------------- /assets/main-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/assets/main-figure.png -------------------------------------------------------------------------------- /assets/main-figure.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/assets/main-figure.svg -------------------------------------------------------------------------------- /docker/agentlm-13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docker/agentlm-13b.yml -------------------------------------------------------------------------------- /docker/agentlm-70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docker/agentlm-70b.yml -------------------------------------------------------------------------------- /docker/agentlm-7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docker/agentlm-7b.yml -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/index.html -------------------------------------------------------------------------------- /docs/static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/css/bulma-carousel.min.css -------------------------------------------------------------------------------- /docs/static/css/bulma-slider.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/css/bulma-slider.min.css -------------------------------------------------------------------------------- /docs/static/css/bulma.css.map.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/css/bulma.css.map.txt -------------------------------------------------------------------------------- /docs/static/css/bulma.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/css/bulma.min.css -------------------------------------------------------------------------------- /docs/static/css/fontawesome.all.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/css/fontawesome.all.min.css -------------------------------------------------------------------------------- /docs/static/css/index.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/css/index.css -------------------------------------------------------------------------------- /docs/static/images/case-study.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/case-study.svg -------------------------------------------------------------------------------- /docs/static/images/error-analysis.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/error-analysis.svg -------------------------------------------------------------------------------- /docs/static/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/favicon.ico -------------------------------------------------------------------------------- /docs/static/images/head-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/head-figure.png -------------------------------------------------------------------------------- /docs/static/images/head-figure.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/head-figure.svg -------------------------------------------------------------------------------- /docs/static/images/main-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/main-figure.png -------------------------------------------------------------------------------- /docs/static/images/main-figure.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/images/main-figure.svg -------------------------------------------------------------------------------- /docs/static/js/bulma-carousel.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/js/bulma-carousel.js -------------------------------------------------------------------------------- /docs/static/js/bulma-carousel.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/js/bulma-carousel.min.js -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/js/bulma-slider.js -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/js/bulma-slider.min.js -------------------------------------------------------------------------------- /docs/static/js/fontawesome.all.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/js/fontawesome.all.min.js -------------------------------------------------------------------------------- /docs/static/js/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/docs/static/js/index.js -------------------------------------------------------------------------------- /eval_general/eval_gsm8k_tgi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_general/eval_gsm8k_tgi.py -------------------------------------------------------------------------------- /eval_general/eval_mmlu_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_general/eval_mmlu_hf.py -------------------------------------------------------------------------------- /eval_general/eval_mt_bench_tgi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_general/eval_mt_bench_tgi.py -------------------------------------------------------------------------------- /eval_general/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_general/requirements.txt -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/eval_hotpot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/eval_hotpot.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.174 2 | pandas<=2.0.0 -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/agent_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/agent_arch.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/config.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/data/easy.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/data/easy.joblib -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/data/hard.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/data/hard.joblib -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/data/medium.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/data/medium.joblib -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/evaluate.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/fewshots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/fewshots.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/hotpotqa_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/hotpotqa_env.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/llms.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/pre_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/pre_prompt.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/prompt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/prompt.txt -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/utils.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/wikienv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/wikienv.py -------------------------------------------------------------------------------- /eval_heldout/hotpotQA/src/wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/hotpotQA/src/wrappers.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/.gitignore -------------------------------------------------------------------------------- /eval_heldout/miniwob++/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/LICENSE -------------------------------------------------------------------------------- /eval_heldout/miniwob++/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/README.md -------------------------------------------------------------------------------- /eval_heldout/miniwob++/available_tasks.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/available_tasks.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/computergym/.gitignore -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/computergym/README.md -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/computergym/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/computergym/computergym/__init__.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/computergym/miniwob/base_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/computergym/computergym/miniwob/base_env.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/.gitignore: -------------------------------------------------------------------------------- 1 | twistd.pid 2 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/js/aa/modules/commonsetup.js: -------------------------------------------------------------------------------- 1 | AAcom.modules.commonsetup=function(AAUI){/* NOT NEEDED */}; 2 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/computergym/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/computergym/setup.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/eval-gpt-3.5-turbo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/eval-gpt-3.5-turbo.sh -------------------------------------------------------------------------------- /eval_heldout/miniwob++/eval-gpt-4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/eval-gpt-4.sh -------------------------------------------------------------------------------- /eval_heldout/miniwob++/eval-tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/eval-tgi.sh -------------------------------------------------------------------------------- /eval_heldout/miniwob++/llm_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/llm_agent.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/main.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/metrics.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt.py -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/choose-list/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/example.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/choose-list/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/choose-list/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button-sequence/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button-sequence/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button-sequence/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button-sequence/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button-sequence/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-button/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-button/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-large/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-large/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-large/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-large/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-large/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-soft/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-soft/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-soft/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-soft/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-soft/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-transfer/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-transfer/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-transfer/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-transfer/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-transfer/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes-transfer/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes-transfer/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-checkboxes/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-checkboxes/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible-2/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible-2/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible-2/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible-2/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible-2/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-collapsible/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-collapsible/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-dialog-2/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-dialog-2/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-dialog-2/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog-2/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-dialog/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-dialog/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-dialog/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-dialog/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-menu/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-menu/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-menu/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-menu/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-option/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-option/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-option/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-option/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-option/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-scroll-list/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-scroll-list/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-scroll-list/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-scroll-list/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-scroll-list/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shades/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shades/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shades/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shades/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shades/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shape/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shape/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shape/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-shape/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-shape/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2-hard/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2-hard/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2-hard/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2-hard/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2-hard/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab-2/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab-2/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-tab/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-tab/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-test/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-test/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-test/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-test/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-test/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-widget/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-widget/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-widget/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/click-widget/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/click-widget/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/count-shape/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/count-shape/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/count-shape/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/count-shape/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/count-shape/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl-turk/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-forward-nl/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-forward-nl/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox-nl-turk/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox-nl-turk/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/email-inbox/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/email-inbox/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-date/action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-date/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-date/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/initialize_plan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-date/initialize_plan.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-date/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-date/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-date/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-time/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-time/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/enter-time/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/enter-time/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/example.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/few-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/few-shot.json -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/focus-text/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/focus-text/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/focus-text/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/focus-text/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/check_finish.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/check_finish.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/done.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/done.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/edit_plan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/edit_plan.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/new_plan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/new_plan.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/grid-coordinate/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/rci_answer.txt: -------------------------------------------------------------------------------- 1 | Answer Yes or No. -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/grid-coordinate/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/identify-shape/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/identify-shape/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/identify-shape/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/identify-shape/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/login-user-popup/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/login-user-popup/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/login-user-popup/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/login-user-popup/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/multi-layouts/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/multi-layouts/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/multi-layouts/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/multi-layouts/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/navigate-tree/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/navigate-tree/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/navigate-tree/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/navigate-tree/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/search-engine/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/search-engine/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/search-engine/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/search-engine/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/search-engine/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/check_finish.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/check_finish.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/done.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/done.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/edit_plan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/edit_plan.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/new_plan.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/new_plan.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/simple-algebra/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/rci_answer.txt: -------------------------------------------------------------------------------- 1 | Answer Yes or No. -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/simple-algebra/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media-all/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media-all/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media-all/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-all/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media-all/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media-some/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/example.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media-some/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media-some/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/social-media/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/social-media/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/terminal/action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/terminal/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/terminal/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/terminal/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/terminal/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/terminal/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/tic-tac-toe/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/tic-tac-toe/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/tic-tac-toe/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/tic-tac-toe/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/update_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/use-autocomplete/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/use-autocomplete/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/use-autocomplete/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-autocomplete/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/use-spinner/base.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/use-spinner/example.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/rci_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/prompt/use-spinner/rci_action.txt -------------------------------------------------------------------------------- /eval_heldout/miniwob++/prompt/use-spinner/update_action.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/miniwob++/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/miniwob++/requirements.txt -------------------------------------------------------------------------------- /eval_heldout/rewoo/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/.gitignore -------------------------------------------------------------------------------- /eval_heldout/rewoo/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/LICENSE -------------------------------------------------------------------------------- /eval_heldout/rewoo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/README.md -------------------------------------------------------------------------------- /eval_heldout/rewoo/algos/PWS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/algos/PWS.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/algos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/rewoo/algos/notool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/algos/notool.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/algos/react.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/algos/react.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/lora.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/templates/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/templates/README.md -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/templates/alpaca.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/templates/alpaca.json -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/templates/alpaca_legacy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/templates/alpaca_legacy.json -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/templates/alpaca_short.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/templates/alpaca_short.json -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/templates/vigogne.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/templates/vigogne.json -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/utils/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/utils/README.md -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/utils/callbacks.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/alpaca/utils/prompter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/alpaca/utils/prompter.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/eval-gpt-3.5-turbo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/eval-gpt-3.5-turbo.sh -------------------------------------------------------------------------------- /eval_heldout/rewoo/eval-gpt-4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/eval-gpt-4.sh -------------------------------------------------------------------------------- /eval_heldout/rewoo/eval-tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/eval-tgi.sh -------------------------------------------------------------------------------- /eval_heldout/rewoo/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/metrics.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/LLMNode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/nodes/LLMNode.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/Node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/nodes/Node.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/NodeCofig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/nodes/NodeCofig.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/Planner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/nodes/Planner.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/Solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/nodes/Solver.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/Worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/nodes/Worker.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/nodes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/rewoo/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/rewoo/prompts/fewshots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/prompts/fewshots.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/prompts/planner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/prompts/planner.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/prompts/solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/prompts/solver.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/prompts/wiki_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/prompts/wiki_prompt.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/requirements.txt -------------------------------------------------------------------------------- /eval_heldout/rewoo/run_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/run_eval.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/utils/CustomDocstoreExplorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/utils/CustomDocstoreExplorer.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/utils/DataLoader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/utils/DataLoader.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/utils/Evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/utils/Evaluator.py -------------------------------------------------------------------------------- /eval_heldout/rewoo/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/rewoo/utils/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/rewoo/utils/util.py -------------------------------------------------------------------------------- /eval_heldout/science-world/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .DS_Store 3 | logs/ 4 | *.pyc 5 | .vscode/ 6 | __pycache__/ -------------------------------------------------------------------------------- /eval_heldout/science-world/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/README.md -------------------------------------------------------------------------------- /eval_heldout/science-world/data_utils/READMD.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/data_utils/READMD.md -------------------------------------------------------------------------------- /eval_heldout/science-world/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/science-world/data_utils/data_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/data_utils/data_convert.py -------------------------------------------------------------------------------- /eval_heldout/science-world/data_utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/data_utils/data_utils.py -------------------------------------------------------------------------------- /eval_heldout/science-world/data_utils/demos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/data_utils/demos.json -------------------------------------------------------------------------------- /eval_heldout/science-world/data_utils/goldpaths-all.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/data_utils/goldpaths-all.zip -------------------------------------------------------------------------------- /eval_heldout/science-world/eval-gpt-3.5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/eval-gpt-3.5.sh -------------------------------------------------------------------------------- /eval_heldout/science-world/eval-gpt-4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/eval-gpt-4.sh -------------------------------------------------------------------------------- /eval_heldout/science-world/eval-tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/eval-tgi.sh -------------------------------------------------------------------------------- /eval_heldout/science-world/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/eval.py -------------------------------------------------------------------------------- /eval_heldout/science-world/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/eval_utils.py -------------------------------------------------------------------------------- /eval_heldout/science-world/fast_agent/deepspeed_reqs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/fast_agent/deepspeed_reqs.txt -------------------------------------------------------------------------------- /eval_heldout/science-world/fast_agent/ds_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/fast_agent/ds_train.py -------------------------------------------------------------------------------- /eval_heldout/science-world/fast_agent/ds_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/fast_agent/ds_train.sh -------------------------------------------------------------------------------- /eval_heldout/science-world/fast_agent/zero_2_bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/fast_agent/zero_2_bf16.json -------------------------------------------------------------------------------- /eval_heldout/science-world/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/metrics.py -------------------------------------------------------------------------------- /eval_heldout/science-world/prompts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/prompts/README.md -------------------------------------------------------------------------------- /eval_heldout/science-world/prompts/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/prompts/convert.py -------------------------------------------------------------------------------- /eval_heldout/science-world/prompts/prompt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/prompts/prompt.json -------------------------------------------------------------------------------- /eval_heldout/science-world/prompts/prompt_orig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/prompts/prompt_orig.json -------------------------------------------------------------------------------- /eval_heldout/science-world/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/requirements.txt -------------------------------------------------------------------------------- /eval_heldout/science-world/slow_agent/local_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/slow_agent/local_llm.py -------------------------------------------------------------------------------- /eval_heldout/science-world/slow_agent/run_gradio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/slow_agent/run_gradio.py -------------------------------------------------------------------------------- /eval_heldout/science-world/slow_agent/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/science-world/slow_agent/utils.py -------------------------------------------------------------------------------- /eval_heldout/webarena/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/.gitignore -------------------------------------------------------------------------------- /eval_heldout/webarena/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/LICENSE -------------------------------------------------------------------------------- /eval_heldout/webarena/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/README.md -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/__init__.py -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/agent.py -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/prompts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/prompts/README.md -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | from .prompt_constructor import * 2 | -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/prompts/prompt_constructor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/prompts/prompt_constructor.py -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/prompts/raw/p_cot_id_actree_2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/prompts/raw/p_cot_id_actree_2s.py -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/prompts/raw/p_direct_id_actree_2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/prompts/raw/p_direct_id_actree_2s.py -------------------------------------------------------------------------------- /eval_heldout/webarena/agent/prompts/to_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/agent/prompts/to_json.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/__init__.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/actions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/actions.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/async_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/async_envs.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/auto_login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/auto_login.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/constants.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/env_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/env_config.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/envs.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/helper_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/helper_functions.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/processors.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/trajectory.py -------------------------------------------------------------------------------- /eval_heldout/webarena/browser_env/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/browser_env/utils.py -------------------------------------------------------------------------------- /eval_heldout/webarena/config_files/examples/1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/config_files/examples/1.json -------------------------------------------------------------------------------- /eval_heldout/webarena/config_files/examples/2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/config_files/examples/2.json -------------------------------------------------------------------------------- /eval_heldout/webarena/config_files/examples/3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/config_files/examples/3.json -------------------------------------------------------------------------------- /eval_heldout/webarena/config_files/examples/4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/config_files/examples/4.json -------------------------------------------------------------------------------- /eval_heldout/webarena/config_files/test.raw.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/config_files/test.raw.json -------------------------------------------------------------------------------- /eval_heldout/webarena/environment_docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/environment_docker/README.md -------------------------------------------------------------------------------- /eval_heldout/webarena/environment_docker/webarena-homepage/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/environment_docker/webarena-homepage/app.py -------------------------------------------------------------------------------- /eval_heldout/webarena/eval-gpt-3.5-turbo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/eval-gpt-3.5-turbo.sh -------------------------------------------------------------------------------- /eval_heldout/webarena/eval-gpt-4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/eval-gpt-4.sh -------------------------------------------------------------------------------- /eval_heldout/webarena/eval-tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/eval-tgi.sh -------------------------------------------------------------------------------- /eval_heldout/webarena/evaluation_harness/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/evaluation_harness/__init__.py -------------------------------------------------------------------------------- /eval_heldout/webarena/evaluation_harness/evaluators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/evaluation_harness/evaluators.py -------------------------------------------------------------------------------- /eval_heldout/webarena/evaluation_harness/helper_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/evaluation_harness/helper_functions.py -------------------------------------------------------------------------------- /eval_heldout/webarena/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/llms/__init__.py -------------------------------------------------------------------------------- /eval_heldout/webarena/llms/lm_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/llms/lm_config.py -------------------------------------------------------------------------------- /eval_heldout/webarena/llms/providers/openai_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/llms/providers/openai_utils.py -------------------------------------------------------------------------------- /eval_heldout/webarena/llms/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/llms/tokenizers.py -------------------------------------------------------------------------------- /eval_heldout/webarena/prepare.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/prepare.sh -------------------------------------------------------------------------------- /eval_heldout/webarena/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/run.py -------------------------------------------------------------------------------- /eval_heldout/webarena/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/setup.cfg -------------------------------------------------------------------------------- /eval_heldout/webarena/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentTuning/HEAD/eval_heldout/webarena/setup.py --------------------------------------------------------------------------------