├── .github └── ISSUE_TEMPLATE │ ├── bug-report---assistance-request.md │ └── feature-request.md ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── agentbench.png ├── architecture.png ├── cover.jpg ├── intro.png ├── leaderboard.png ├── logo.png └── statistics.png ├── configs ├── agents │ ├── api_agents.yaml │ ├── fs_agent.yaml │ ├── openai-chat.yaml │ └── openai-text.yaml ├── assignments │ ├── default.yaml │ ├── definition.yaml │ └── test_avalon.yaml ├── start_task.yaml └── tasks │ ├── alfworld.yaml │ ├── avalon.yaml │ ├── card_game.yaml │ ├── dbbench.yaml │ ├── kg.yaml │ ├── ltp.yaml │ ├── mind2web.yaml │ ├── os.yaml │ ├── task_assembly.yaml │ └── webshop.yaml ├── data ├── alfworld │ ├── dev.json │ └── standard.json ├── avalon │ └── dev.json ├── dbbench │ ├── dev.jsonl │ └── standard.jsonl ├── knowledgegraph │ ├── dev.json │ └── std.json ├── lateralthinkingpuzzle │ ├── dev.xlsx │ └── standard.xlsx ├── lateralthinkingpuzzle_zh │ ├── dev.xlsx │ └── standard.xlsx ├── mind2web │ └── prompt │ │ ├── llm_prompt.json │ │ └── llm_prompt_cot.json └── os_interaction │ ├── data │ ├── 1 │ │ └── stock.json │ ├── 2 │ │ └── environment.json │ ├── 3 │ │ └── ac.json │ ├── 4 │ │ ├── N11.json │ │ ├── N225.json │ │ ├── N37.json │ │ ├── N4.json │ │ ├── N41.json │ │ ├── Q09.json │ │ ├── Q19.json │ │ ├── Q30.json │ │ ├── Q47.json │ │ └── Q49.json │ ├── 5 │ │ └── new.json │ ├── 6 │ │ └── new.json │ ├── 7 │ │ └── bootstrap.json │ ├── 6-backup.json │ └── dev.json │ ├── res │ └── dockerfiles │ │ ├── default │ │ ├── packages │ │ └── ubuntu │ └── scripts │ ├── 1 │ ├── check │ │ ├── 1.sh │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ ├── example │ │ └── 1.sh │ └── init │ │ ├── gen_words.sh │ │ ├── install_nettools.sh │ │ ├── nested_folders.sh │ │ └── stock-log.sh │ ├── 2 │ ├── check │ │ ├── 1.sh │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ ├── example │ │ └── 1.sh │ └── init │ │ ├── gen_words.sh │ │ ├── install_nettools.sh │ │ ├── nested_folders.sh │ │ └── stock-log.sh │ ├── 3 │ ├── check │ │ ├── 1.sh │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ ├── example │ │ └── 1.sh │ └── init │ │ ├── gen_words.sh │ │ ├── install_nettools.sh │ │ ├── nested_folders.sh │ │ └── stock-log.sh │ ├── 4 │ ├── check │ │ ├── 1.sh │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ ├── example │ │ └── 1.sh │ └── init │ │ ├── gen_words.sh │ │ ├── install_nettools.sh │ │ ├── nested_folders.sh │ │ └── stock-log.sh │ ├── 5 │ ├── check │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ ├── checking │ │ ├── 0.sh │ │ ├── 1.sh │ │ └── 2.sh │ ├── example │ │ ├── 0.sh │ │ ├── 1.sh │ │ └── 2.sh │ ├── file.json │ ├── init │ │ └── 1.sh │ ├── new.json │ └── prompt.md │ ├── 6 │ ├── collected.json │ ├── collecting.py │ ├── new.json │ ├── permission.json │ └── stackoverflow │ │ └── spider.py │ ├── 7 │ ├── check │ │ ├── 1.sh │ │ ├── containing.py │ │ ├── in.py │ │ ├── integer-match.py │ │ ├── size-match.py │ │ └── string-match.py │ ├── example │ │ └── 1.sh │ └── init │ │ ├── gen_words.sh │ │ ├── install_nettools.sh │ │ ├── nested_folders.sh │ │ └── stock-log.sh │ └── dev │ ├── check │ ├── 0.sh │ ├── containing.py │ ├── in.py │ ├── integer-match.py │ ├── size-match.py │ └── string-match.py │ ├── example │ └── 0.sh │ └── init │ └── stock-log.sh ├── docs ├── Config_cn.md ├── Config_en.md ├── Entrance_cn.md ├── Entrance_en.md ├── Extension_cn.md ├── Extension_en.md ├── Introduction_cn.md └── Introduction_en.md ├── requirements.txt └── src ├── __init__.py ├── analysis.py ├── assigner.py ├── client ├── __init__.py ├── agent.py ├── agent_test.py ├── agents │ ├── __init__.py │ ├── claude_agent.py │ ├── fastchat_client.py │ ├── http_agent.py │ └── test_agent.py └── task.py ├── configs.py ├── server ├── __init__.py ├── task.py ├── task_controller.py ├── task_worker.py └── tasks │ ├── __init__.py │ ├── alfworld │ ├── __init__.py │ ├── configs │ │ └── base_config.yaml │ ├── environment.py │ ├── prompts │ │ ├── alfworld_multiturn.json │ │ ├── alfworld_multiturn_cot.json │ │ ├── alfworld_multiturn_new.json │ │ ├── alfworld_multiturn_plan_first.json │ │ ├── alfworld_multiturn_pure.json │ │ ├── alfworld_multiturn_react.json │ │ └── alfworld_singleturn.json │ ├── task.py │ └── utils.py │ ├── avalon │ ├── README.md │ ├── __init__.py │ ├── agents │ │ ├── agent.py │ │ ├── baseline_agents.py │ │ └── llm_with_discussion.py │ ├── avalon_exception.py │ ├── engine.py │ ├── prompts.py │ ├── task.py │ ├── task_scoring.py │ ├── utils.py │ └── wrapper.py │ ├── card_game │ ├── AI │ │ ├── AI_En.py │ │ ├── __init__.py │ │ ├── action1.py │ │ ├── action2.py │ │ ├── basline1.py │ │ ├── basline2.py │ │ ├── client.py │ │ ├── main.py │ │ ├── prompt │ │ │ ├── __init__.py │ │ │ ├── cn.py │ │ │ ├── cn_en.py │ │ │ └── en.py │ │ └── sdk │ │ │ ├── CMakeLists.txt │ │ │ ├── ai_client.cpython-310-x86_64-linux-gnu.so │ │ │ ├── ai_client.cpython-38-x86_64-linux-gnu.so │ │ │ ├── ai_client.hpp │ │ │ ├── ai_client.pyi │ │ │ ├── build │ │ │ ├── CMakeCache.txt │ │ │ ├── CMakeFiles │ │ │ │ ├── 3.25.0 │ │ │ │ │ ├── CMakeCXXCompiler.cmake │ │ │ │ │ ├── CMakeDetermineCompilerABI_CXX.bin │ │ │ │ │ ├── CMakeSystem.cmake │ │ │ │ │ └── CompilerIdCXX │ │ │ │ │ │ ├── CMakeCXXCompilerId.cpp │ │ │ │ │ │ └── a.out │ │ │ │ ├── CMakeDirectoryInformation.cmake │ │ │ │ ├── CMakeOutput.log │ │ │ │ ├── Makefile.cmake │ │ │ │ ├── Makefile2 │ │ │ │ ├── TargetDirectories.txt │ │ │ │ ├── ai_client.dir │ │ │ │ │ ├── DependInfo.cmake │ │ │ │ │ ├── build.make │ │ │ │ │ ├── cmake_clean.cmake │ │ │ │ │ ├── compiler_depend.make │ │ │ │ │ ├── compiler_depend.ts │ │ │ │ │ ├── depend.make │ │ │ │ │ ├── flags.make │ │ │ │ │ ├── jsoncpp │ │ │ │ │ │ ├── jsoncpp.cpp.o │ │ │ │ │ │ └── jsoncpp.cpp.o.d │ │ │ │ │ ├── link.txt │ │ │ │ │ ├── progress.make │ │ │ │ │ ├── py_ai_sdk.cpp.o │ │ │ │ │ └── py_ai_sdk.cpp.o.d │ │ │ │ ├── cmake.check_cache │ │ │ │ └── progress.marks │ │ │ ├── Makefile │ │ │ ├── bin │ │ │ │ └── ai_client.cpython-310-x86_64-linux-gnu.so │ │ │ └── cmake_install.cmake │ │ │ ├── jsoncpp │ │ │ ├── json │ │ │ │ ├── json-forwards.h │ │ │ │ └── json.h │ │ │ └── jsoncpp.cpp │ │ │ ├── py_ai_sdk.cpp │ │ │ ├── py_json_cast.hpp │ │ │ └── pyd │ │ │ ├── ai_client.cp38-win_amd64.pyd │ │ │ ├── ai_client.cpython-310-x86_64-linux-gnu.so │ │ │ ├── ai_client.cpython-36m-x86_64-linux-gnu.so │ │ │ ├── ai_client.cpython-38-darwin.so │ │ │ └── ai_client.cpython-38-x86_64-linux-gnu.so │ ├── __init__.py │ ├── judger │ │ ├── __init__.py │ │ ├── cal_metric.py │ │ ├── judger.py │ │ ├── rserver.py │ │ └── run_all.py │ ├── logic │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── bin │ │ │ └── main │ │ └── src │ │ │ ├── aqua_sdk.cpp │ │ │ ├── fish.cpp │ │ │ ├── fish.h │ │ │ ├── fishset.cpp │ │ │ ├── fishset.h │ │ │ ├── game.cpp │ │ │ ├── game.h │ │ │ ├── jsoncpp │ │ │ ├── json │ │ │ │ ├── json-forwards.h │ │ │ │ └── json.h │ │ │ └── jsoncpp.cpp │ │ │ ├── main │ │ │ ├── main.cpp │ │ │ ├── player.cpp │ │ │ ├── player.h │ │ │ └── timer.h │ ├── server.py │ ├── task.py │ └── utils.py │ ├── dbbench │ ├── Interaction.py │ ├── __init__.py │ └── requirements.txt │ ├── knowledgegraph │ ├── __init__.py │ ├── api.py │ ├── ontology │ │ ├── fb_roles │ │ ├── fb_types │ │ ├── reverse_properties │ │ └── vocab.json │ ├── requirements.txt │ ├── task.py │ └── utils │ │ ├── logic_form_util.py │ │ ├── semparse_util.py │ │ └── sparql_executer.py │ ├── ltp │ ├── __init__.py │ └── task.py │ ├── mind2web │ ├── __init__.py │ ├── data_utils │ │ ├── __init__.py │ │ └── dom_utils.py │ ├── dataloader.py │ └── task.py │ ├── os_interaction │ ├── __init__.py │ ├── images.py │ ├── requirements.txt │ └── task.py │ └── webshop │ ├── .github │ ├── ISSUE_TEMPLATE.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows │ │ └── pytest.yml │ ├── .gitignore │ ├── LICENSE.md │ ├── README.md │ ├── __init__.py │ ├── assets │ ├── diagram.gif │ ├── model_ckpts.png │ └── transfer-logic.png │ ├── baseline_models │ ├── .gitignore │ ├── README.md │ ├── agent.py │ ├── env.py │ ├── generate_search.py │ ├── logger.py │ ├── models │ │ ├── bert.py │ │ ├── modules.py │ │ └── rnn.py │ ├── requirements.txt │ ├── test.py │ ├── train_choice_il.py │ ├── train_rl.py │ └── train_search_il.py │ ├── conftest.py │ ├── requirements.txt │ ├── run_dev.sh │ ├── run_envs │ ├── run_web_agent_site_env.py │ └── run_web_agent_text_env.py │ ├── run_prod.sh │ ├── run_web_agent_site_env.sh │ ├── run_web_agent_text_env.sh │ ├── search_engine │ ├── convert_product_file_format.py │ ├── lucene_searcher.py │ └── run_indexing.sh │ ├── setup.sh │ ├── tests │ ├── transfer │ │ ├── mocks │ │ │ ├── mock_parse_item_page_amz │ │ │ ├── mock_parse_item_page_ebay │ │ │ ├── mock_parse_item_page_ws │ │ │ ├── mock_parse_item_page_ws_desc │ │ │ ├── mock_parse_item_page_ws_feat │ │ │ ├── mock_parse_results_amz │ │ │ ├── mock_parse_results_ebay │ │ │ └── mock_parse_results_ws │ │ └── test_predict_help.py │ └── web-agent-site │ │ ├── engine │ │ ├── test_goal.py │ │ └── test_normalize.py │ │ └── test_utils.py │ ├── transfer │ ├── README.md │ ├── __init__.py │ ├── app.py │ ├── predict_help.py │ └── webshop_lite.py │ └── web_agent_site │ ├── __init__.py │ ├── app.py │ ├── attributes │ ├── annotate.py │ └── generate_attrs.py │ ├── engine │ ├── __init__.py │ ├── engine.py │ ├── goal.py │ └── normalize.py │ ├── envs │ ├── __init__.py │ ├── web_agent_site_env.py │ └── web_agent_text_env.py │ ├── models │ ├── __init__.py │ └── models.py │ ├── static │ ├── images │ │ └── no-image-available.png │ └── style.css │ ├── templates │ ├── attributes_page.html │ ├── description_page.html │ ├── done_page.html │ ├── features_page.html │ ├── item_page.html │ ├── results_page.html │ ├── review_page.html │ └── search_page.html │ └── utils.py ├── start_task.py ├── typings ├── __init__.py ├── config.py ├── exception.py ├── general.py ├── output.py ├── request.py └── status.py └── utils ├── __init__.py ├── max_flow.py ├── others.py └── rules.py /.github/ISSUE_TEMPLATE/bug-report---assistance-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report & Assistance Request 3 | about: Create a report to help us improve 4 | title: "[Bug/Assistance] " 5 | labels: bug, help wanted 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Screenshots or Terminal Copy&Paste** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Desktop (please complete the following information):** 24 | - OS: [e.g. Ubuntu 22.04] 25 | - Python: [e.g. 3.9] 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest an idea for this project 4 | title: "[Feature] " 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | %* 3 | .idea 4 | .vscode 5 | src/tasks/humaneval_x/env/vendor 6 | logs 7 | outputs 8 | data/full 9 | results 10 | config.sh 11 | download 12 | .DS_Store 13 | # local* 14 | *.ipynb 15 | .cache 16 | src/server/tasks/card_game/result 17 | .dockerfile 18 | .dockerfile-cache 19 | analysis -------------------------------------------------------------------------------- /assets/agentbench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/agentbench.png -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/architecture.png -------------------------------------------------------------------------------- /assets/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/cover.jpg -------------------------------------------------------------------------------- /assets/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/intro.png -------------------------------------------------------------------------------- /assets/leaderboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/leaderboard.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/logo.png -------------------------------------------------------------------------------- /assets/statistics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/statistics.png -------------------------------------------------------------------------------- /configs/agents/api_agents.yaml: -------------------------------------------------------------------------------- 1 | gpt-3.5-turbo-0613: 2 | import: "./openai-chat.yaml" 3 | parameters: 4 | name: "gpt-3.5-turbo-0613" 5 | body: 6 | model: "gpt-3.5-turbo-0613" 7 | max_tokens: 512 8 | 9 | text-davinci-003: 10 | import: "./openai-text.yaml" 11 | parameters: 12 | name: "text-davinci-003" 13 | body: 14 | model: "text-davinci-003" 15 | max_tokens: 512 16 | 17 | text-davinci-002: 18 | import: "./openai-text.yaml" 19 | parameters: 20 | name: "text-davinci-002" 21 | body: 22 | model: "text-davinci-002" 23 | max_tokens: 512 24 | -------------------------------------------------------------------------------- /configs/agents/fs_agent.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: "src.client.agents.FastChatAgent" 3 | parameters: 4 | name: "FastChat" 5 | controller_address: "http://localhost:55555" 6 | max_new_tokens: 512 7 | temperature: 0 8 | 9 | vicuna-33b: 10 | parameters: 11 | model_name: "vicuna-33b-v1.3" 12 | 13 | wizard-30b: 14 | parameters: 15 | model_name: "WizardLM-30B-V1.0-merged" 16 | 17 | vicuna-13b: 18 | parameters: 19 | model_name: "vicuna-13b-v1.5" 20 | 21 | vicuna-7b: 22 | parameters: 23 | model_name: "vicuna-7b-v1.5" 24 | -------------------------------------------------------------------------------- /configs/agents/openai-chat.yaml: -------------------------------------------------------------------------------- 1 | module: src.client.agents.HTTPAgent 2 | parameters: 3 | url: https://api.openai.com/v1/chat/completions 4 | headers: 5 | Content-Type: application/json 6 | Authorization: Bearer <% PUT-YOUR-OPENAI-KEY-HERE %> 7 | body: 8 | temperature: 0 9 | prompter: 10 | name: role_content_dict 11 | args: 12 | agent_role: assistant 13 | return_format: "{response[choices][0][message][content]}" 14 | -------------------------------------------------------------------------------- /configs/agents/openai-text.yaml: -------------------------------------------------------------------------------- 1 | module: src.client.agents.HTTPAgent 2 | parameters: 3 | name: <% NAME %> 4 | url: https://api.openai.com/v1/completions 5 | headers: 6 | Content-Type: application/json 7 | Authorization: Bearer <% PUT-YOUR-OPENAI-KEY-HERE %> 8 | body: 9 | model: <% NAME %> 10 | temperature: 0 11 | prompter: 12 | name: prompt_string 13 | return_format: "{response[choices][0][text]}" 14 | 15 | -------------------------------------------------------------------------------- /configs/assignments/default.yaml: -------------------------------------------------------------------------------- 1 | import: definition.yaml 2 | 3 | concurrency: 4 | task: 5 | dbbench-std: 5 6 | os-std: 5 7 | agent: 8 | gpt-3.5-turbo-0613: 5 9 | 10 | assignments: # List[Assignment] | Assignment 11 | - agent: # "task": List[str] | str , "agent": List[str] | str 12 | - gpt-3.5-turbo-0613 13 | task: 14 | - dbbench-std 15 | - os-std 16 | 17 | output: "outputs/{TIMESTAMP}" 18 | -------------------------------------------------------------------------------- /configs/assignments/definition.yaml: -------------------------------------------------------------------------------- 1 | definition: 2 | task: 3 | overwrite: 4 | module: src.client.TaskClient 5 | parameters: 6 | controller_address: "http://localhost:5000/api" 7 | import: ../tasks/task_assembly.yaml 8 | agent: 9 | import: 10 | - ../agents/api_agents.yaml 11 | - ../agents/fs_agent.yaml -------------------------------------------------------------------------------- /configs/assignments/test_avalon.yaml: -------------------------------------------------------------------------------- 1 | import: definition.yaml 2 | 3 | concurrency: 4 | task: 5 | avalon-dev-single: 3 6 | agent: 7 | gpt-3.5-turbo-0613: 3 8 | 9 | assignments: # List[Assignment] | Assignment 10 | - agent: # "task": List[str] | str , "agent": List[str] | str 11 | - gpt-3.5-turbo-0613 12 | task: 13 | - avalon-dev-single 14 | 15 | output: "outputs/{TIMESTAMP}" 16 | -------------------------------------------------------------------------------- /configs/start_task.yaml: -------------------------------------------------------------------------------- 1 | definition: 2 | import: tasks/task_assembly.yaml 3 | 4 | start: 5 | dbbench-std: 5 6 | os-std: 5 7 | -------------------------------------------------------------------------------- /configs/tasks/alfworld.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: src.server.tasks.alfworld.ALFWorld 3 | docker: 4 | image: longinyu/agentbench-alfworld 5 | command: umask 0; [ -f /root/.setup.sh ] && bash /root/.setup.sh; 6 | parameters: 7 | name: alfworld-std 8 | data_path: "/AgentBench/data/alfworld" 9 | config_path: "src/server/tasks/alfworld/configs/base_config.yaml" 10 | prompts_path: "src/server/tasks/alfworld/prompts/alfworld_multiturn_plan_first.json" 11 | split: "standard" 12 | max_step: 35 13 | 14 | alfworld-dev: 15 | parameters: 16 | name: alfworld-dev 17 | split: "dev" 18 | 19 | alfworld-std: 20 | parameters: 21 | name: alfworld-std 22 | split: "standard" 23 | -------------------------------------------------------------------------------- /configs/tasks/avalon.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: "src.server.tasks.avalon.AvalonBench" 3 | parameters: 4 | concurrency: 5 5 | num_players: 5 6 | discussion: False 7 | 8 | avalon-dev-naive: 9 | parameters: 10 | name: "AvalonBench-dev-naive" 11 | data_file: "data/avalon/dev.json" 12 | agent_list: ["naive", "naive", "naive", "naive", "naive"] 13 | 14 | avalon-dev-single: 15 | parameters: 16 | name: "AvalonBench-dev-single" 17 | data_file: "data/avalon/dev.json" 18 | agent_list: ["llm", "naive", "naive", "naive", "naive"] -------------------------------------------------------------------------------- /configs/tasks/card_game.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: src.server.tasks.card_game.CardGame 3 | docker: 4 | image: longinyu/agentbench-card_game 5 | parameters: 6 | port: 12347 7 | workers: 10 8 | 9 | cg-dev: 10 | parameters: 11 | name: "CardGame-dev" 12 | test_time: 3 13 | 14 | cg-std: 15 | parameters: 16 | name: "CardGame-std" 17 | test_time: 5 18 | -------------------------------------------------------------------------------- /configs/tasks/dbbench.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: src.server.tasks.dbbench.DBBench 3 | parameters: 4 | concurrency: 1 5 | max_round: 15 6 | 7 | dbbench-dev: 8 | parameters: 9 | name: dbbench-dev 10 | data_file: "data/dbbench/dev.jsonl" 11 | 12 | dbbench-std: 13 | parameters: 14 | name: dbbench-std 15 | data_file: "data/dbbench/standard.jsonl" 16 | -------------------------------------------------------------------------------- /configs/tasks/kg.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: "src.server.tasks.knowledgegraph.KnowledgeGraph" 3 | parameters: 4 | round: 15 5 | sparql_url: "http://164.107.116.56:3093/sparql" 6 | 7 | kg-dev: 8 | parameters: 9 | name: "KnowledgeGraph-dev" 10 | data_file: "data/knowledgegraph/dev.json" 11 | 12 | kg-std: 13 | parameters: 14 | name: "KnowledgeGraph-std" 15 | data_file: "data/knowledgegraph/std.json" 16 | -------------------------------------------------------------------------------- /configs/tasks/ltp.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: src.server.tasks.ltp.LateralThinkingPuzzle 3 | docker: 4 | image: longinyu/agentbench-ltp 5 | parameters: 6 | round: 25 7 | eval_yaml: "configs/agents/api_agents.yaml" 8 | 9 | ltp-dev: 10 | parameters: 11 | name: ltp-dev 12 | filepath: "data/lateralthinkingpuzzle/dev.xlsx" 13 | 14 | ltp-std: 15 | parameters: 16 | name: ltp-std 17 | filepath: "data/lateralthinkingpuzzle/standard.xlsx" -------------------------------------------------------------------------------- /configs/tasks/mind2web.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: src.server.tasks.mind2web.Mind2Web 3 | docker: 4 | image: longinyu/agentbench-mind2web 5 | parameters: 6 | concurrency: 10 7 | data: 8 | data_path: "." 9 | cache_path: "./data/mind2web/.cache/data" 10 | score_file: /root/work/data/scores_all_data.pkl 11 | train: 12 | neg_ratio: 0.2 13 | num_candidates: 5 14 | max_context_len: 512 15 | model: 16 | mode: "multichoice" 17 | name: flan-t5-base 18 | model_name_or_path: "google/flan-t5-base" 19 | max_seq_length: 2048 20 | eval: 21 | topk: 10 22 | seed: 123 23 | llm_prompt: data/mind2web/prompt/llm_prompt_cot.json 24 | 25 | m2w-dev: 26 | parameters: 27 | name: "Mind2Web-dev" 28 | data: 29 | test_split_files: 30 | test_domain: /root/work/data/data_dev/*.json 31 | 32 | m2w-std: 33 | parameters: 34 | name: "Mind2Web-std" 35 | data: 36 | test_split_files: 37 | test_domain: /root/work/data/data_std/*.json 38 | -------------------------------------------------------------------------------- /configs/tasks/os.yaml: -------------------------------------------------------------------------------- 1 | os-dev: 2 | module: "src.server.tasks.os_interaction.OSInteraction" 3 | parameters: 4 | name: "os-dev" 5 | concurrency: 24 6 | round_limit: 8 7 | 8 | docker_config: 9 | localhost: local-os 10 | directory: data/os_interaction/res/dockerfiles 11 | 12 | scripts: 13 | directory: data/os_interaction/res/scripts 14 | 15 | data_config: 16 | files: 17 | - problem_file: data/os_interaction/data/dev.json 18 | script_dir: data/os_interaction/scripts/dev/ 19 | index_prefix: "dev-001-" 20 | 21 | bk: [ ] 22 | ignore: [ ] 23 | 24 | os-std: 25 | module: "src.server.tasks.os_interaction.OSInteraction" 26 | parameters: 27 | name: "os-std" 28 | concurrency: 24 29 | round_limit: 8 30 | 31 | docker_config: 32 | localhost: local-os 33 | directory: data/os_interaction/res/dockerfiles 34 | 35 | scripts: 36 | directory: data/os_interaction/res/scripts 37 | 38 | data_config: 39 | files: 40 | - problem_file: data/os_interaction/data/1/*.json 41 | script_dir: data/os_interaction/scripts/1/ 42 | index_prefix: "std-001-" 43 | - problem_file: data/os_interaction/data/2/*.json 44 | script_dir: data/os_interaction/scripts/2/ 45 | index_prefix: "std-002-" 46 | - problem_file: data/os_interaction/data/3/*.json 47 | script_dir: data/os_interaction/scripts/3/ 48 | index_prefix: "std-003-" 49 | - problem_file: data/os_interaction/data/4/*.json 50 | script_dir: data/os_interaction/scripts/4/ 51 | index_prefix: "std-004-" 52 | - problem_file: data/os_interaction/data/5/*.json 53 | script_dir: data/os_interaction/scripts/5/ 54 | index_prefix: "std-005-" 55 | - problem_file: data/os_interaction/data/6/*.json 56 | script_dir: data/os_interaction/scripts/6/ 57 | index_prefix: "std-006-" 58 | - problem_file: data/os_interaction/data/7/*.json 59 | script_dir: data/os_interaction/scripts/7/ 60 | index_prefix: "std-007-" 61 | 62 | bk: [ ] 63 | ignore: [ ] 64 | -------------------------------------------------------------------------------- /configs/tasks/task_assembly.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | docker: 3 | command: umask 0; [ -f /root/.setup.sh ] && bash /root/.setup.sh; 4 | 5 | import: 6 | - webshop.yaml 7 | - dbbench.yaml 8 | - mind2web.yaml 9 | - card_game.yaml 10 | - kg.yaml 11 | - os.yaml 12 | - ltp.yaml 13 | - alfworld.yaml 14 | - avalon.yaml -------------------------------------------------------------------------------- /configs/tasks/webshop.yaml: -------------------------------------------------------------------------------- 1 | default: 2 | module: src.server.tasks.webshop_docker.WebShop 3 | parameters: 4 | concurrency: 1 5 | docker: 6 | image: longinyu/agentbench-webshop 7 | command: ln -s /root/webshop /root/workspace/src/server/tasks/webshop_docker;cp /root/workspace/src/server/tasks/webshop/__init__.py /root/webshop/__init__.py; 8 | 9 | webshop-dev: 10 | parameters: 11 | name: webshop-dev 12 | start: 200 13 | end: 280 14 | 15 | webshop-std: 16 | parameters: 17 | name: webshop-std 18 | start: 0 19 | end: 200 20 | -------------------------------------------------------------------------------- /data/alfworld/dev.json: -------------------------------------------------------------------------------- 1 | { 2 | "pick_and_place": [ 3 | "json_2.1.1/valid_unseen/pick_and_place_simple-SoapBottle-None-Toilet-424/trial_T20190907_004404_604165/game.tw-pddl", 4 | "json_2.1.1/valid_unseen/pick_and_place_simple-Pencil-None-Shelf-308/trial_T20190908_122154_042763/game.tw-pddl", 5 | "json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Cabinet-10/trial_T20190906_191445_723170/game.tw-pddl", 6 | "json_2.1.1/valid_unseen/pick_and_place_simple-Mug-None-Desk-308/trial_T20190909_210238_431966/game.tw-pddl" 7 | ], 8 | "pick_clean_then_place": [ 9 | "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Cloth-None-Cabinet-424/trial_T20190908_022436_073995/game.tw-pddl", 10 | "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-Cabinet-424/trial_T20190908_215019_162873/game.tw-pddl", 11 | "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Pan-None-CounterTop-10/trial_T20190908_032543_712058/game.tw-pddl", 12 | "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-CounterTop-424/trial_T20190907_074106_050405/game.tw-pddl" 13 | ], 14 | "pick_heat_then_place": [ 15 | "json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Cup-None-Cabinet-10/trial_T20190907_083346_800823/game.tw-pddl", 16 | "json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Cup-None-Cabinet-10/trial_T20190907_083507_594820/game.tw-pddl", 17 | "json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Tomato-None-GarbageCan-10/trial_T20190908_225453_272533/game.tw-pddl" 18 | ], 19 | "pick_cool_then_place": [ 20 | "json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Potato-None-Microwave-10/trial_T20190907_033157_424297/game.tw-pddl", 21 | "json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Pan-None-CounterTop-10/trial_T20190908_114622_738670/game.tw-pddl", 22 | "json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Bread-None-CounterTop-10/trial_T20190908_091747_866951/game.tw-pddl" 23 | ], 24 | "look_at_obj": [ 25 | "json_2.1.1/valid_unseen/look_at_obj_in_light-Mug-None-DeskLamp-308/trial_T20190908_161733_213242/game.tw-pddl", 26 | "json_2.1.1/valid_unseen/look_at_obj_in_light-Bowl-None-DeskLamp-308/trial_T20190907_133935_066606/game.tw-pddl", 27 | "json_2.1.1/valid_unseen/look_at_obj_in_light-Pencil-None-DeskLamp-308/trial_T20190908_220656_510400/game.tw-pddl" 28 | ], 29 | "pick_two_obj": [ 30 | "json_2.1.1/valid_unseen/pick_two_obj_and_place-Pillow-None-Sofa-219/trial_T20190907_163327_486300/game.tw-pddl", 31 | "json_2.1.1/valid_unseen/pick_two_obj_and_place-SoapBar-None-GarbageCan-424/trial_T20190909_064053_839817/game.tw-pddl", 32 | "json_2.1.1/valid_unseen/pick_two_obj_and_place-CD-None-Safe-308/trial_T20190907_051013_060265/game.tw-pddl" 33 | ] 34 | } -------------------------------------------------------------------------------- /data/avalon/dev.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "num_players": 5, 4 | "quest_leader": 0, 5 | "role_names": ["Servant", "Merlin", "Servant", "Assassin", "Minion"] 6 | }, 7 | { 8 | "num_players": 5, 9 | "quest_leader": 0, 10 | "role_names": ["Servant", "Assassin", "Servant", "Merlin", "Minion"] 11 | }, 12 | { 13 | "num_players": 5, 14 | "quest_leader": 0, 15 | "role_names": ["Servant", "Assassin", "Merlin", "Servant", "Minion"] 16 | }, 17 | { 18 | "num_players": 5, 19 | "quest_leader": 0, 20 | "role_names": ["Servant", "Servant", "Assassin", "Merlin", "Minion"] 21 | }, 22 | { 23 | "num_players": 5, 24 | "quest_leader": 0, 25 | "role_names": ["Servant", "Minion", "Assassin", "Merlin", "Servant"] 26 | }, 27 | { 28 | "num_players": 5, 29 | "quest_leader": 0, 30 | "role_names": ["Assassin", "Servant", "Merlin", "Servant", "Minion"] 31 | }, 32 | { 33 | "num_players": 5, 34 | "quest_leader": 0, 35 | "role_names": ["Assassin", "Servant", "Servant", "Merlin", "Minion"] 36 | }, 37 | { 38 | "num_players": 5, 39 | "quest_leader": 0, 40 | "role_names": ["Assassin", "Servant", "Servant", "Minion", "Merlin"] 41 | }, 42 | { 43 | "num_players": 5, 44 | "quest_leader": 0, 45 | "role_names": ["Assassin", "Merlin", "Servant", "Servant", "Minion"] 46 | }, 47 | { 48 | "num_players": 5, 49 | "quest_leader": 0, 50 | "role_names": ["Assassin", "Minion", "Servant", "Servant", "Merlin"] 51 | }, 52 | { 53 | "num_players": 5, 54 | "quest_leader": 1, 55 | "role_names": ["Servant", "Merlin", "Servant", "Assassin", "Minion"] 56 | }, 57 | { 58 | "num_players": 5, 59 | "quest_leader": 1, 60 | "role_names": ["Servant", "Assassin", "Servant", "Merlin", "Minion"] 61 | }, 62 | { 63 | "num_players": 5, 64 | "quest_leader": 1, 65 | "role_names": ["Servant", "Assassin", "Merlin", "Servant", "Minion"] 66 | }, 67 | { 68 | "num_players": 5, 69 | "quest_leader": 1, 70 | "role_names": ["Servant", "Servant", "Assassin", "Merlin", "Minion"] 71 | }, 72 | { 73 | "num_players": 5, 74 | "quest_leader": 1, 75 | "role_names": ["Servant", "Minion", "Assassin", "Merlin", "Servant"] 76 | }, 77 | { 78 | "num_players": 5, 79 | "quest_leader": 1, 80 | "role_names": ["Assassin", "Servant", "Merlin", "Servant", "Minion"] 81 | }, 82 | { 83 | "num_players": 5, 84 | "quest_leader": 1, 85 | "role_names": ["Assassin", "Servant", "Servant", "Merlin", "Minion"] 86 | }, 87 | { 88 | "num_players": 5, 89 | "quest_leader": 1, 90 | "role_names": ["Assassin", "Servant", "Servant", "Minion", "Merlin"] 91 | }, 92 | { 93 | "num_players": 5, 94 | "quest_leader": 1, 95 | "role_names": ["Assassin", "Merlin", "Servant", "Servant", "Minion"] 96 | }, 97 | { 98 | "num_players": 5, 99 | "quest_leader": 1, 100 | "role_names": ["Assassin", "Minion", "Servant", "Servant", "Merlin"] 101 | } 102 | ] -------------------------------------------------------------------------------- /data/lateralthinkingpuzzle/dev.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle/dev.xlsx -------------------------------------------------------------------------------- /data/lateralthinkingpuzzle/standard.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle/standard.xlsx -------------------------------------------------------------------------------- /data/lateralthinkingpuzzle_zh/dev.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle_zh/dev.xlsx -------------------------------------------------------------------------------- /data/lateralthinkingpuzzle_zh/standard.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle_zh/standard.xlsx -------------------------------------------------------------------------------- /data/os_interaction/data/3/ac.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "Tell me the number of CPUs.", 4 | "evaluation": { 5 | "check": [ 6 | null, 7 | { 8 | "language": "python", 9 | "file": "check/integer-match.py" 10 | } 11 | ], 12 | "example": "nproc" 13 | }, 14 | "labels": [ 15 | "command", 16 | "CPU", 17 | "device", 18 | "hardware", 19 | "processor", 20 | "system" 21 | ] 22 | }, 23 | { 24 | "description": "How many groups are in this UNIX system?", 25 | "evaluation": { 26 | "check": [ 27 | null, 28 | { 29 | "language": "python", 30 | "file": "check/integer-match.py" 31 | } 32 | ], 33 | "example": "getent group | wc -l" 34 | }, 35 | "labels": ["command", "system"] 36 | }, 37 | { 38 | "description": "Recursively Find how many files have the extension .tep", 39 | "create": { 40 | "local": "default", 41 | "init": "echo 123>/root/.tmp\necho 123>/root/.tmp2\necho 123>/root/1.tep\necho 123>/root/.tep\necho 123>/root/.u\necho 123>/root/.k\n" 42 | }, 43 | "evaluation": { 44 | "check": [ 45 | null, 46 | { 47 | "language": "python", 48 | "file": "check/integer-match.py" 49 | } 50 | ], 51 | "example": "find . -type f -name \"*.tep\" | wc -l" 52 | }, 53 | "labels": ["command", "file"] 54 | }, 55 | { 56 | "description": "Tell me whether npm is installed or not. If it is installed, return 'yes'. If it is not installed, return 'no'", 57 | "evaluation": { 58 | "check": [ 59 | null, 60 | { 61 | "language": "python", 62 | "file": "check/string-match.py" 63 | } 64 | ], 65 | "example": "[ -x \"$(command -v npm)\" ] && echo 'yes' || echo 'no'" 66 | }, 67 | "labels": ["command", "packages"] 68 | }, 69 | { 70 | "description": "Tell me whether npm is installed or not. If it is installed, return 'yes'. If it is not installed, return 'no'", 71 | "create": { 72 | "local": "packages" 73 | }, 74 | "evaluation": { 75 | "check": [ 76 | null, 77 | { 78 | "language": "python", 79 | "file": "check/string-match.py" 80 | } 81 | ], 82 | "example": "[ -x \"$(command -v npm)\" ] && echo 'yes' || echo 'no'" 83 | }, 84 | "labels": ["command", "packages"] 85 | }, 86 | { 87 | "description": "Tell me the max number of threads in Linux.", 88 | "evaluation": { 89 | "check": [ 90 | null, 91 | { 92 | "language": "python", 93 | "file": "check/string-match.py" 94 | } 95 | ], 96 | "example": "cat /proc/sys/kernel/threads-max" 97 | }, 98 | "labels": ["command", "file"] 99 | } 100 | ] 101 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/N11.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "How many hidden files, which do not contain the character u, are in /usr? (not recursively)", 3 | "create": { 4 | "local": "default", 5 | "init": "echo 123>/usr/.tmp\necho 123>/usr/tmp2\necho 123>/usr/.tm12p\necho 123>/usr/.f\necho 123>/usr/.u\necho 123>/usr/k\n" 6 | }, 7 | "evaluation": { 8 | "check": [ 9 | null, 10 | { 11 | "language": "python", 12 | "file": "check/integer-match.py" 13 | } 14 | ], 15 | "example": { 16 | "code": "find /usr -type f -name \".*\" -not -name \"*k*\" | wc -l" 17 | } 18 | }, 19 | "labels": ["file"] 20 | } 21 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/N225.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Get the name of most recent file in /usr. (not recursively)", 3 | "create": { 4 | "local": "default", 5 | "init": "i=1; while [ $i -lt 100 ]; do echo \"Content: $((i * 2 % 3))\" > \"/usr/$(( (i+50)%100 )).log\"; i=$((i+1)); done" 6 | }, 7 | "evaluation": { 8 | "check": [ 9 | null, 10 | { 11 | "language": "python", 12 | "file": "check/string-match.py" 13 | } 14 | ], 15 | "example": { 16 | "code": "ls -lt /usr | head -n 2 | tail -n 1 | awk '{print $9}'" 17 | } 18 | }, 19 | "labels": ["file"] 20 | } 21 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/N37.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "What's the full path of a file \"TOOLS\"?", 3 | "create": { 4 | "local": "default", 5 | "init": "echo 123>/usr/TOOLS" 6 | }, 7 | "evaluation": { 8 | "check": [ 9 | null, 10 | { 11 | "language": "python", 12 | "file": "check/string-match.py" 13 | } 14 | ], 15 | "example": "find / -name TOOLS" 16 | }, 17 | "labels": ["path", "file"] 18 | } 19 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/N4.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "execute ~/test and get the output.", 3 | "start": "useradd -s /bin/bash -d /home/jack -m jack && cd /home/jack && echo 'echo love' > test && chmod 000 test && chown jack test && su - jack", 4 | "evaluation": { 5 | "match": "love", 6 | "example": { 7 | "code": "chmod 777 test && ~/test" 8 | } 9 | }, 10 | "labels": ["permission", "user"] 11 | } 12 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/N41.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).", 4 | "create": { 5 | "local": "default", 6 | "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme" 7 | }, 8 | "evaluation": { 9 | "match": "no" 10 | }, 11 | "labels": ["file"] 12 | }, 13 | { 14 | "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).", 15 | "create": { 16 | "local": "default", 17 | "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme && mkdir /working/OS/workspace" 18 | }, 19 | "evaluation": { 20 | "match": "no" 21 | }, 22 | "labels": ["file"] 23 | }, 24 | { 25 | "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).", 26 | "create": { 27 | "local": "default", 28 | "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme && touch /working/OS/workspace" 29 | }, 30 | "evaluation": { 31 | "match": "yes" 32 | }, 33 | "labels": ["file"] 34 | }, 35 | { 36 | "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).", 37 | "create": { 38 | "local": "default", 39 | "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme && touch /working/workspace" 40 | }, 41 | "evaluation": { 42 | "match": "yes" 43 | }, 44 | "labels": ["file"] 45 | } 46 | ] 47 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/Q09.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "What is the total number of files in the /var directory? (recursively)", 4 | "create": { 5 | "local": "default", 6 | "init": "echo 123321>/var/tmp.log && echo 123321>/var/dsdd && echo 123321>/var/ppoo && echo 123321>/var/gdss && echo 123321>/var/itcs && echo 123321>/var/ksmx" 7 | }, 8 | "evaluation": { 9 | "check": [ 10 | null, 11 | { 12 | "language": "python", 13 | "file": "check/integer-match.py" 14 | } 15 | ], 16 | "example": { 17 | "code": "find /var -type f | wc -l" 18 | } 19 | }, 20 | "labels": ["command", "file"] 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/Q19.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "How many lines are there in /root/matrix.txt", 4 | "create": { 5 | "local": "default", 6 | "init":"echo \"1 2 3\n4 5 6\n7 8 9\" > /root/matrix.txt" 7 | }, 8 | "evaluation": { 9 | "check": [ 10 | null, 11 | { 12 | "language": "python", 13 | "file": "check/integer-match.py" 14 | } 15 | ], 16 | "example": { 17 | "code": "cat /root/matrix.txt | wc -l" 18 | } 19 | }, 20 | "labels": ["command","file"] 21 | } 22 | ] -------------------------------------------------------------------------------- /data/os_interaction/data/4/Q47.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "Move the file MyPersonalComputer.config to current directory. And tell me the path of the file before copying.", 4 | "create": { 5 | "local": "default", 6 | "init": "mkdir /home/user1 && mkdir /home/user1/os && mkdir /home/user1/os/linux && mkdir /home/user1/os/linux/commands && echo 'Linux-Ubuntu-20.04?' > /home/user1/os/linux/commands/MyPersonalComputer.config" 7 | }, 8 | "start": "mkdir /home/user2 && cd /home/user2", 9 | "evaluation": { 10 | "check": [ 11 | { 12 | "language": "python", 13 | "code": "from sys import argv\nassert argv[1].strip()=='/home/user1/os/linux/commands/MyPersonalComputer.config'" 14 | }, 15 | { 16 | "language": "bash", 17 | "code": "find /home/user1/os/linux/commands/MyPersonalComputer.config >/dev/null 2>/dev/null && exit 1 || exit 0" 18 | }, 19 | { 20 | "language": "bash", 21 | "code": "find /home/user2/MyPersonalComputer.config >/dev/null 2>/dev/null && exit 0 || exit 1" 22 | } 23 | ], 24 | "example": { 25 | "code": "mv /home/user1/os/linux/commands/MyPersonalComputer.config /home/user2/ && echo /home/user1/os/linux/commands/MyPersonalComputer.config" 26 | } 27 | }, 28 | "labels": ["path", "files"] 29 | } 30 | ] 31 | -------------------------------------------------------------------------------- /data/os_interaction/data/4/Q49.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "Test if variable var is an integer? If so, tell me \"yes\", otherwise tell me \"no\". (Without quotes)", 4 | "create": { 5 | "local": "default" 6 | }, 7 | "start": "var=3.14", 8 | "evaluation": { 9 | "match": "no", 10 | "example": { 11 | "code": "test \"$var\" -eq \"$var\" 2>/dev/null && echo \"yes\" || echo \"no\"" 12 | } 13 | }, 14 | "labels": ["command", "variables"] 15 | }, 16 | { 17 | "description": "Test if variable var is an integer? If so, tell me \"yes\", otherwise tell me \"no\". (Without quotes)", 18 | "create": { 19 | "local": "default" 20 | }, 21 | "start": "var=10", 22 | "evaluation": { 23 | "match": "yes", 24 | "example": { 25 | "code": "test \"$var\" -eq \"$var\" 2>/dev/null && echo \"yes\" || echo \"no\"" 26 | } 27 | }, 28 | "labels": ["command", "variables"] 29 | } 30 | ] 31 | -------------------------------------------------------------------------------- /data/os_interaction/res/dockerfiles/default: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | RUN apt update && apt install python3 -y && apt install python3-pip -y && apt install git -y && apt install vim -y && apt install curl -y && apt install wget -y && apt install unzip -y && apt install zip -y && apt install tree -y 3 | CMD ["bash"] 4 | -------------------------------------------------------------------------------- /data/os_interaction/res/dockerfiles/packages: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | RUN apt update && apt install python3 -y && apt install python3-pip -y && apt install git -y && apt install vim -y && apt install curl -y && apt install wget -y && apt install unzip -y && apt install zip -y && apt install tree -y && apt install npm -y 3 | CMD ["bash"] 4 | -------------------------------------------------------------------------------- /data/os_interaction/res/dockerfiles/ubuntu: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | RUN echo "tmp" > file.txt 3 | CMD ["bash"] -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/check/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | count_files() { 4 | # echo $1 >> tmp.log 5 | local dir=$1 6 | local count=0 7 | 8 | for file in "$dir"/*; do 9 | if [ -f "$file" ]; then 10 | count=$((count + 1)) 11 | elif [ -d "$file" ]; then 12 | count_sub=$(count_files "$file") 13 | count=$((count + count_sub)) 14 | fi 15 | done 16 | 17 | echo "$count" 18 | } 19 | 20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 21 | 22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/example/1.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | count_files() { 4 | local dir=$1 5 | local count=0 6 | 7 | for file in "$dir"/*; do 8 | if [ -f "$file" ]; then 9 | count=$((count + 1)) 10 | elif [ -d "$file" ]; then 11 | count_sub=$(count_files "$file") 12 | count=$((count + count_sub)) 13 | fi 14 | done 15 | 16 | echo "$count" 17 | } 18 | 19 | directory="$1" 20 | total_count=$(count_files "$directory") 21 | echo "$total_count"' > /usr/local/bin/count 22 | chmod +x /usr/local/bin/count -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/init/gen_words.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm') 4 | declare -a sep=(' ' ' ' ' ' ' ') 5 | 6 | out='/usr/words.txt' 7 | 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}" 9 | for i in {1..99}; do 10 | echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}" 11 | done 12 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/init/install_nettools.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | apt-get update && apt-get install -y net-tools iproute2 lsof -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/init/nested_folders.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | create_nested_folders() { 4 | local depth=$1 # Current depth 5 | local max_depth=$2 # Max depth 6 | 7 | 8 | mkdir "folder$depth" 9 | cd "folder$depth" 10 | 11 | touch "echo-love" 12 | 13 | if [ $depth -eq 5 ]; then 14 | echo "echo \"I love myself.\"" > "echo-love" 15 | chmod +x "echo-love" 16 | fi 17 | 18 | if [ $depth -lt $max_depth ]; then 19 | create_nested_folders $((depth + 1)) $max_depth 20 | fi 21 | 22 | cd .. 23 | } 24 | 25 | max_depth=10 26 | 27 | create_nested_folders 1 $max_depth 28 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/1/init/stock-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the possible values for each field 4 | names=("Alice" "Bob") 5 | actions=("Purchase" "Sell") 6 | 7 | # Generate 400 random lines 8 | for ((i=1; i<=401; i++)) 9 | do 10 | # Randomly select values for each field 11 | name=${names[$RANDOM % ${#names[@]}]} 12 | action=${actions[$RANDOM % ${#actions[@]}]} 13 | stock_index=$((RANDOM % 100)) 14 | count=$((RANDOM % 1000)) 15 | 16 | # Write the line to the file 17 | echo "$name | $action | $stock_index | $count" >> /usr/stock.log 18 | done 19 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/check/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | count_files() { 4 | # echo $1 >> tmp.log 5 | local dir=$1 6 | local count=0 7 | 8 | for file in "$dir"/*; do 9 | if [ -f "$file" ]; then 10 | count=$((count + 1)) 11 | elif [ -d "$file" ]; then 12 | count_sub=$(count_files "$file") 13 | count=$((count + count_sub)) 14 | fi 15 | done 16 | 17 | echo "$count" 18 | } 19 | 20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 21 | 22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/example/1.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | count_files() { 4 | local dir=$1 5 | local count=0 6 | 7 | for file in "$dir"/*; do 8 | if [ -f "$file" ]; then 9 | count=$((count + 1)) 10 | elif [ -d "$file" ]; then 11 | count_sub=$(count_files "$file") 12 | count=$((count + count_sub)) 13 | fi 14 | done 15 | 16 | echo "$count" 17 | } 18 | 19 | directory="$1" 20 | total_count=$(count_files "$directory") 21 | echo "$total_count"' > /usr/local/bin/count 22 | chmod +x /usr/local/bin/count -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/init/gen_words.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm') 4 | declare -a sep=(' ' ' ' ' ' ' ') 5 | 6 | out='/usr/words.txt' 7 | 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}" 9 | for i in {1..99}; do 10 | echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}" 11 | done 12 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/init/install_nettools.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | apt-get update && apt-get install -y net-tools iproute2 lsof -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/init/nested_folders.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | create_nested_folders() { 4 | local depth=$1 # Current depth 5 | local max_depth=$2 # Max depth 6 | 7 | 8 | mkdir "folder$depth" 9 | cd "folder$depth" 10 | 11 | touch "echo-love" 12 | 13 | if [ $depth -eq 5 ]; then 14 | echo "echo \"I love myself.\"" > "echo-love" 15 | chmod +x "echo-love" 16 | fi 17 | 18 | if [ $depth -lt $max_depth ]; then 19 | create_nested_folders $((depth + 1)) $max_depth 20 | fi 21 | 22 | cd .. 23 | } 24 | 25 | max_depth=10 26 | 27 | create_nested_folders 1 $max_depth 28 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/2/init/stock-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the possible values for each field 4 | names=("Alice" "Bob") 5 | actions=("Purchase" "Sell") 6 | 7 | # Generate 400 random lines 8 | for ((i=1; i<=401; i++)) 9 | do 10 | # Randomly select values for each field 11 | name=${names[$RANDOM % ${#names[@]}]} 12 | action=${actions[$RANDOM % ${#actions[@]}]} 13 | stock_index=$((RANDOM % 100)) 14 | count=$((RANDOM % 1000)) 15 | 16 | # Write the line to the file 17 | echo "$name | $action | $stock_index | $count" >> /usr/stock.log 18 | done 19 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/check/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | count_files() { 4 | # echo $1 >> tmp.log 5 | local dir=$1 6 | local count=0 7 | 8 | for file in "$dir"/*; do 9 | if [ -f "$file" ]; then 10 | count=$((count + 1)) 11 | elif [ -d "$file" ]; then 12 | count_sub=$(count_files "$file") 13 | count=$((count + count_sub)) 14 | fi 15 | done 16 | 17 | echo "$count" 18 | } 19 | 20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 21 | 22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/example/1.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | count_files() { 4 | local dir=$1 5 | local count=0 6 | 7 | for file in "$dir"/*; do 8 | if [ -f "$file" ]; then 9 | count=$((count + 1)) 10 | elif [ -d "$file" ]; then 11 | count_sub=$(count_files "$file") 12 | count=$((count + count_sub)) 13 | fi 14 | done 15 | 16 | echo "$count" 17 | } 18 | 19 | directory="$1" 20 | total_count=$(count_files "$directory") 21 | echo "$total_count"' > /usr/local/bin/count 22 | chmod +x /usr/local/bin/count -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/init/gen_words.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm') 4 | declare -a sep=(' ' ' ' ' ' ' ') 5 | 6 | out='/usr/words.txt' 7 | 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}" 9 | for i in {1..99}; do 10 | echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}" 11 | done 12 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/init/install_nettools.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | apt-get update && apt-get install -y net-tools iproute2 lsof -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/init/nested_folders.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | create_nested_folders() { 4 | local depth=$1 # Current depth 5 | local max_depth=$2 # Max depth 6 | 7 | 8 | mkdir "folder$depth" 9 | cd "folder$depth" 10 | 11 | touch "echo-love" 12 | 13 | if [ $depth -eq 5 ]; then 14 | echo "echo \"I love myself.\"" > "echo-love" 15 | chmod +x "echo-love" 16 | fi 17 | 18 | if [ $depth -lt $max_depth ]; then 19 | create_nested_folders $((depth + 1)) $max_depth 20 | fi 21 | 22 | cd .. 23 | } 24 | 25 | max_depth=10 26 | 27 | create_nested_folders 1 $max_depth 28 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/3/init/stock-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the possible values for each field 4 | names=("Alice" "Bob") 5 | actions=("Purchase" "Sell") 6 | 7 | # Generate 400 random lines 8 | for ((i=1; i<=401; i++)) 9 | do 10 | # Randomly select values for each field 11 | name=${names[$RANDOM % ${#names[@]}]} 12 | action=${actions[$RANDOM % ${#actions[@]}]} 13 | stock_index=$((RANDOM % 100)) 14 | count=$((RANDOM % 1000)) 15 | 16 | # Write the line to the file 17 | echo "$name | $action | $stock_index | $count" >> /usr/stock.log 18 | done 19 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/check/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | count_files() { 4 | # echo $1 >> tmp.log 5 | local dir=$1 6 | local count=0 7 | 8 | for file in "$dir"/*; do 9 | if [ -f "$file" ]; then 10 | count=$((count + 1)) 11 | elif [ -d "$file" ]; then 12 | count_sub=$(count_files "$file") 13 | count=$((count + count_sub)) 14 | fi 15 | done 16 | 17 | echo "$count" 18 | } 19 | 20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 21 | 22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/example/1.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | count_files() { 4 | local dir=$1 5 | local count=0 6 | 7 | for file in "$dir"/*; do 8 | if [ -f "$file" ]; then 9 | count=$((count + 1)) 10 | elif [ -d "$file" ]; then 11 | count_sub=$(count_files "$file") 12 | count=$((count + count_sub)) 13 | fi 14 | done 15 | 16 | echo "$count" 17 | } 18 | 19 | directory="$1" 20 | total_count=$(count_files "$directory") 21 | echo "$total_count"' > /usr/local/bin/count 22 | chmod +x /usr/local/bin/count -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/init/gen_words.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm') 4 | declare -a sep=(' ' ' ' ' ' ' ') 5 | 6 | out='/usr/words.txt' 7 | 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}" 9 | for i in {1..99}; do 10 | echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}" 11 | done 12 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/init/install_nettools.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | apt-get update && apt-get install -y net-tools iproute2 lsof -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/init/nested_folders.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | create_nested_folders() { 4 | local depth=$1 # Current depth 5 | local max_depth=$2 # Max depth 6 | 7 | 8 | mkdir "folder$depth" 9 | cd "folder$depth" 10 | 11 | touch "echo-love" 12 | 13 | if [ $depth -eq 5 ]; then 14 | echo "echo \"I love myself.\"" > "echo-love" 15 | chmod +x "echo-love" 16 | fi 17 | 18 | if [ $depth -lt $max_depth ]; then 19 | create_nested_folders $((depth + 1)) $max_depth 20 | fi 21 | 22 | cd .. 23 | } 24 | 25 | max_depth=10 26 | 27 | create_nested_folders 1 $max_depth 28 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/4/init/stock-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the possible values for each field 4 | names=("Alice" "Bob") 5 | actions=("Purchase" "Sell") 6 | 7 | # Generate 400 random lines 8 | for ((i=1; i<=401; i++)) 9 | do 10 | # Randomly select values for each field 11 | name=${names[$RANDOM % ${#names[@]}]} 12 | action=${actions[$RANDOM % ${#actions[@]}]} 13 | stock_index=$((RANDOM % 100)) 14 | count=$((RANDOM % 1000)) 15 | 16 | # Write the line to the file 17 | echo "$name | $action | $stock_index | $count" >> /usr/stock.log 18 | done 19 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/checking/0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | check() { 4 | local expression="$*" 5 | # echo python3 -c "print(\"%.6f\"%($expression))" >> tmp.log 6 | local expected_result=`python3 -c "print(\"%.6f\"%($expression))"` 7 | local output=$(calc "${expression}") 8 | # echo "$expression", $expected_result, $output >> tmp.log 9 | echo `python3 -c "print(abs($output - $expected_result)<1e-5)"` 10 | } 11 | 12 | # echo > tmp.log 13 | [ `check "15 + (27 * 4) - 10"`x != Truex ] && exit 1 14 | [ `check "8 * (14 - 6) + 12"`x != Truex ] && exit 1 15 | [ `check "3 + (6.7 * 9) - 5.5"`x != Truex ] && exit 1 16 | [ `check "20 / (5 + 2) - 1"`x != Truex ] && exit 1 17 | [ `check "9 * (16 / 8) + 3"`x != Truex ] && exit 1 18 | [ `check "25 - (8 * 3) + 2"`x != Truex ] && exit 1 19 | [ `check "14 + (25.6 / 2) - 5.2"`x != Truex ] && exit 1 20 | [ `check "18 / (6 - 2) + 9"`x != Truex ] && exit 1 21 | [ `check "10 + (15 * 2) - 7"`x != Truex ] && exit 1 22 | [ `check "30 / (5 + 1) - 2"`x != Truex ] && exit 1 23 | [ `check "7 * (18 / 9) + 4"`x != Truex ] && exit 1 24 | [ `check "22 - (4 * 6) + 3"`x != Truex ] && exit 1 25 | [ `check "16 + (14.8 / 2) - 3.2"`x != Truex ] && exit 1 26 | [ `check "24 / (8 - 4) + 5"`x != Truex ] && exit 1 27 | [ `check "12 + (11 * 3) - 6"`x != Truex ] && exit 1 28 | [ `check "35 / (7 + 1) - 4"`x != Truex ] && exit 1 29 | [ `check "6 * (9 / 3) + 7"`x != Truex ] && exit 1 30 | [ `check "19 - (7 * 2) + 5"`x != Truex ] && exit 1 31 | [ `check "13 + (29.5 / 5) - 1.5"`x != Truex ] && exit 1 32 | [ `check "28 / (14 - 6) + 2"`x != Truex ] && exit 1 33 | [ `check "11 + (8 * 5) - 9"`x != Truex ] && exit 1 34 | [ `check "40 / (4 + 2) - 3"`x != Truex ] && exit 1 35 | [ `check "4 * (12 / 6) + 6"`x != Truex ] && exit 1 36 | [ `check "23 - (6 * 4) + 8"`x != Truex ] && exit 1 37 | [ `check "17 + (18.2 / 2) - 2.8"`x != Truex ] && exit 1 38 | [ `check "36 / (6 - 3) + 10"`x != Truex ] && exit 1 39 | [ `check "5 + (10 * 4) - 8"`x != Truex ] && exit 1 40 | [ `check "50 / (5 + 2) - 6"`x != Truex ] && exit 1 41 | [ `check "8 * (16 / 4) + 9"`x != Truex ] && exit 1 42 | [ `check "21 - (9 * 2) + 4"`x != Truex ] && exit 1 43 | 44 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/checking/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | count_files() { 4 | # echo $1 >> tmp.log 5 | local dir=$1 6 | local count=0 7 | 8 | for file in "$dir"/*; do 9 | if [ -f "$file" ]; then 10 | count=$((count + 1)) 11 | elif [ -d "$file" ]; then 12 | count_sub=$(count_files "$file") 13 | count=$((count + count_sub)) 14 | fi 15 | done 16 | 17 | echo "$count" 18 | } 19 | 20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 21 | 22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/checking/2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | check() { 4 | target=`date -d "$1" +"$2"` 5 | output=`date-format "$1" "$2"` || exit 1 6 | [ "$output"x != "$target"x ] && exit 1 7 | exit 0 8 | } 9 | 10 | check "2023-5-1" "%Y-%m" || exit 1 11 | check "23-5-2" "%Y-%m-%d" || exit 1 12 | check "2023-5-1" "%Y/%m" || exit 1 13 | check "2023-5-1" "%m/%d" || exit 1 14 | check "2023/5/10" "%d/%m" || exit 1 15 | check "2021/05/1" "Date: %Y-%m-%d" || exit 1 16 | 17 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/example/0.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | python3 -c "print(\"%.6f\"%($*))"' > calc 3 | chmod +x calc 4 | mv calc /usr/local/bin/ 5 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/example/1.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | count_files() { 4 | local dir=$1 5 | local count=0 6 | 7 | for file in "$dir"/*; do 8 | if [ -f "$file" ]; then 9 | count=$((count + 1)) 10 | elif [ -d "$file" ]; then 11 | count_sub=$(count_files "$file") 12 | count=$((count + count_sub)) 13 | fi 14 | done 15 | 16 | echo "$count" 17 | } 18 | 19 | directory="$1" 20 | total_count=$(count_files "$directory") 21 | echo "$total_count"' > /usr/local/bin/count 22 | chmod +x /usr/local/bin/count -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/example/2.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | date -d "$1" +"$2" 4 | 5 | ' > /usr/local/bin/date-format 6 | chmod +x /usr/local/bin/date-format 7 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/init/1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/os_interaction/scripts/5/init/1.sh -------------------------------------------------------------------------------- /data/os_interaction/scripts/5/prompt.md: -------------------------------------------------------------------------------- 1 | generate 5 bash problems, and their corresponding solutions and checking script. Note that the solution should contains multi-lines, and the checking script should exit 0 when succeed and exit 1 when failed. Besides, the problems, solutions, and the checking script should match the following format (the [TODO(description)] tags represent the blanks that you should fill): 2 | 3 | Problem [TODO(index)]: "I would like to implement the following function: the \"[TODO(command)]\" command can help [TODO(function)]. For example, if I enter command [TODO(example input, command and its parameters)], the output will be [TODO(example result)]." 4 | 5 | Solution [TODO(index)] 6 | 7 | ```bash 8 | echo '#!/bin/bash 9 | 10 | [TODO(implement, multi-lines)] 11 | 12 | ' > /usr/local/bin/[TODO(command)] 13 | chmod +x /usr/local/bin/[TODO(command)] 14 | ``` 15 | 16 | Checking Script [TODO(index)] 17 | 18 | ```bash 19 | [TODO(some preparation)] 20 | 21 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1 22 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1 23 | ... 24 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1 25 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1 26 | exit 0 27 | ``` 28 | 29 | For example: 30 | 31 | Problem 1: "I would like to implement the following function: entering the \"count\" command will counts the number of regular files in a directory and its subdirectories(recursively), and displays the total count. If there is a link or something, count it separately. For example, I can enter \"count /usr/local\" to get the number of regular files in /usr/local recursively. If there are 5 regular files in that, the output is \"5\"." 32 | 33 | Solution 1 34 | 35 | ```bash 36 | echo '#!/bin/bash 37 | 38 | count_files() { 39 | local dir=$1 40 | local count=0 41 | 42 | for file in "$dir"/*; do 43 | if [ -f "$file" ]; then 44 | count=$((count + 1)) 45 | elif [ -d "$file" ]; then 46 | count_sub=$(count_files "$file") 47 | count=$((count + count_sub)) 48 | fi 49 | done 50 | 51 | echo "$count" 52 | } 53 | 54 | directory="$1" 55 | total_count=$(count_files "$directory") 56 | echo "$total_count"' > /usr/local/bin/count 57 | chmod +x /usr/local/bin/count 58 | ``` 59 | 60 | Checking Script 1 61 | 62 | ```bash 63 | #!/bin/bash 64 | 65 | count_files() { 66 | # echo $1 >> tmp.log 67 | local dir=$1 68 | local count=0 69 | 70 | for file in "$dir"/*; do 71 | if [ -f "$file" ]; then 72 | count=$((count + 1)) 73 | elif [ -d "$file" ]; then 74 | count_sub=$(count_files "$file") 75 | count=$((count + count_sub)) 76 | fi 77 | done 78 | 79 | echo "$count" 80 | } 81 | 82 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 83 | 84 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 85 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 86 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 87 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 88 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 89 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 90 | exit 0 91 | ``` 92 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/6/collecting.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | with open("permission.json") as f: 5 | p = f.read() 6 | 7 | f = open("collected.json", "a") 8 | correct = 0 9 | while correct < 1000: 10 | try: 11 | resp = requests.post( 12 | "http://45.41.95.10:10001/api/openai/chat-completion", 13 | json={ 14 | "model": "gpt-3.5-turbo", 15 | "messages": [ 16 | {"role": "user", 17 | "content": "The topic is: linux permission questions.\ngenerate one like this:\n" + p[2:-1]}, 18 | ] 19 | }, 20 | timeout=60 21 | ) 22 | entry = json.loads(resp.json()["choices"][0]["message"]["content"]) 23 | f.write(json.dumps(entry)) 24 | f.write(",\n") 25 | f.flush() 26 | correct += 1 27 | except Exception as e: 28 | print(e) 29 | if correct % 50 == 0: 30 | print(correct) 31 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/6/stackoverflow/spider.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | 5 | if __name__ == '__main__': 6 | f = open("data.csv", "a") 7 | for tag in ["linux", "bash", "operating-system", "ubuntu"]: 8 | for i in range(1, 10): 9 | r = requests.get(f"https://stackoverflow.com/questions/tagged/{tag}?tab=votes&page={i}&pagesize=50") 10 | soup = BeautifulSoup(r.text, "html.parser") 11 | question_div = soup.find("div", {"id": "questions"}) 12 | for question in question_div.find_all("div", class_="s-post-summary--content"): 13 | title = question.h3.a 14 | f.write(title.string + ", " + title["href"] + "\n") 15 | f.flush() 16 | f.close() 17 | 18 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/check/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | count_files() { 4 | # echo $1 >> tmp.log 5 | local dir=$1 6 | local count=0 7 | 8 | for file in "$dir"/*; do 9 | if [ -f "$file" ]; then 10 | count=$((count + 1)) 11 | elif [ -d "$file" ]; then 12 | count_sub=$(count_files "$file") 13 | count=$((count + count_sub)) 14 | fi 15 | done 16 | 17 | echo "$count" 18 | } 19 | 20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"` 21 | 22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1 23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1 24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1 25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1 26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1 27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/example/1.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | 3 | count_files() { 4 | local dir=$1 5 | local count=0 6 | 7 | for file in "$dir"/*; do 8 | if [ -f "$file" ]; then 9 | count=$((count + 1)) 10 | elif [ -d "$file" ]; then 11 | count_sub=$(count_files "$file") 12 | count=$((count + count_sub)) 13 | fi 14 | done 15 | 16 | echo "$count" 17 | } 18 | 19 | directory="$1" 20 | total_count=$(count_files "$directory") 21 | echo "$total_count"' > /usr/local/bin/count 22 | chmod +x /usr/local/bin/count -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/init/gen_words.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm') 4 | declare -a sep=(' ' ' ' ' ' ' ') 5 | 6 | out='/usr/words.txt' 7 | 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}" 9 | for i in {1..99}; do 10 | echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}" 11 | done 12 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/init/install_nettools.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | apt-get update && apt-get install -y net-tools iproute2 lsof -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/init/nested_folders.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | create_nested_folders() { 4 | local depth=$1 # Current depth 5 | local max_depth=$2 # Max depth 6 | 7 | 8 | mkdir "folder$depth" 9 | cd "folder$depth" 10 | 11 | touch "echo-love" 12 | 13 | if [ $depth -eq 5 ]; then 14 | echo "echo \"I love myself.\"" > "echo-love" 15 | chmod +x "echo-love" 16 | fi 17 | 18 | if [ $depth -lt $max_depth ]; then 19 | create_nested_folders $((depth + 1)) $max_depth 20 | fi 21 | 22 | cd .. 23 | } 24 | 25 | max_depth=10 26 | 27 | create_nested_folders 1 $max_depth 28 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/7/init/stock-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the possible values for each field 4 | names=("Alice" "Bob") 5 | actions=("Purchase" "Sell") 6 | 7 | # Generate 400 random lines 8 | for ((i=1; i<=401; i++)) 9 | do 10 | # Randomly select values for each field 11 | name=${names[$RANDOM % ${#names[@]}]} 12 | action=${actions[$RANDOM % ${#actions[@]}]} 13 | stock_index=$((RANDOM % 100)) 14 | count=$((RANDOM % 1000)) 15 | 16 | # Write the line to the file 17 | echo "$name | $action | $stock_index | $count" >> /usr/stock.log 18 | done 19 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/check/0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | check() { 4 | local expression="$*" 5 | # echo python3 -c "print(\"%.6f\"%($expression))" >> tmp.log 6 | local expected_result=`python3 -c "print(\"%.6f\"%($expression))"` 7 | local output=$(calc "${expression}") 8 | # echo "$expression", $expected_result, $output >> tmp.log 9 | echo `python3 -c "print(abs($output - $expected_result)<1e-5)"` 10 | } 11 | 12 | # echo > tmp.log 13 | [ `check "15 + (27 * 4) - 10"`x != Truex ] && exit 1 14 | [ `check "8 * (14 - 6) + 12"`x != Truex ] && exit 1 15 | [ `check "3 + (6.7 * 9) - 5.5"`x != Truex ] && exit 1 16 | [ `check "20 / (5 + 2) - 1"`x != Truex ] && exit 1 17 | [ `check "9 * (16 / 8) + 3"`x != Truex ] && exit 1 18 | [ `check "25 - (8 * 3) + 2"`x != Truex ] && exit 1 19 | [ `check "14 + (25.6 / 2) - 5.2"`x != Truex ] && exit 1 20 | [ `check "18 / (6 - 2) + 9"`x != Truex ] && exit 1 21 | [ `check "17 + (18.2 / 2) - 2.8"`x != Truex ] && exit 1 22 | [ `check "36 / (6 - 3) + 10"`x != Truex ] && exit 1 23 | [ `check "5 + (10 * 4) - 8"`x != Truex ] && exit 1 24 | [ `check "50 / (5 + 2) - 6"`x != Truex ] && exit 1 25 | [ `check "8 * (16 / 4) + 9"`x != Truex ] && exit 1 26 | [ `check "21 - (9 * 2) + 4"`x != Truex ] && exit 1 27 | 28 | exit 0 -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/check/containing.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v2 in v1: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/check/in.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | v1 = norm_newline(argv[1]).strip() 7 | v2 = norm_newline(argv[2]).strip() 8 | 9 | if v1 in v2: 10 | exit(0) 11 | else: 12 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/check/integer-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | if int(argv[1]) == int(argv[2]): exit(0) 3 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/check/size-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def analysis_size(size_str): 4 | size_str = size_str.strip() 5 | availables = { 6 | "B": 1, 7 | "Byte": 1, 8 | "K": 1024, 9 | "KB": 1024, 10 | "M": 1024*1024, 11 | "MB": 1024*1024, 12 | "G": 1024*1024*1024, 13 | "GB": 1024*1024*1024, 14 | "T": 1024*1024*1024*1024, 15 | "TB": 1024*1024*1024*1024, 16 | "P": 1024*1024*1024*1024*1024, 17 | "PB": 1024*1024*1024*1024*1024, 18 | } 19 | for size_unit in availables: 20 | if size_str.endswith(size_unit): 21 | return int(size_str[:-len(size_unit)]) * availables[size_unit] 22 | return int(size_str) 23 | 24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 25 | exit(0) 26 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/check/string-match.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | 3 | def norm_newline(s): 4 | return s.replace("\r\n", "\n").replace("\r", "\n") 5 | 6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip(): 7 | exit(0) 8 | exit(1) -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/example/0.sh: -------------------------------------------------------------------------------- 1 | echo '#!/bin/bash 2 | python3 -c "print(\"%.6f\"%($*))"' > calc 3 | chmod +x calc 4 | mv calc /usr/local/bin/ 5 | -------------------------------------------------------------------------------- /data/os_interaction/scripts/dev/init/stock-log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the possible values for each field 4 | names=("Alice" "Bob") 5 | actions=("Purchase" "Sell") 6 | 7 | # Generate 400 random lines 8 | for ((i=1; i<=401; i++)) 9 | do 10 | # Randomly select values for each field 11 | name=${names[$RANDOM % ${#names[@]}]} 12 | action=${actions[$RANDOM % ${#actions[@]}]} 13 | stock_index=$((RANDOM % 100)) 14 | count=$((RANDOM % 1000)) 15 | 16 | # Write the line to the file 17 | echo "$name | $action | $stock_index | $count" >> /usr/stock.log 18 | done 19 | -------------------------------------------------------------------------------- /docs/Config_cn.md: -------------------------------------------------------------------------------- 1 | # 配置系统 2 | 3 | [🌏English](Config_cn.md) 4 | 5 | ## 基本语法 6 | 7 | 配置系统采用了YAML格式。为了方便配置,我们在基础的YAML语法上做了一些扩展。 8 | `import`, `default`, `overwrite`是我们扩展的关键字。 9 | 10 | ### import 11 | 12 | `import`关键字用于导入其他文件中的配置。例如以下两个写法是等价的: 13 | 14 | 写法一: 15 | 16 | ```yaml 17 | # config.yaml 18 | definition: 19 | def1: something... 20 | def2: something... 21 | ``` 22 | 23 | 写法二: 24 | 25 | ```yaml 26 | # def1.yaml 27 | def1: something... 28 | 29 | # def2.yaml 30 | def2: something... 31 | 32 | # config.yaml 33 | definition: 34 | import: 35 | - def1.yaml 36 | - def2.yaml 37 | ``` 38 | 39 | `import`关键字支持字符串或者列表作为值, 40 | 分别对应导入单个文件和导入多个文件的情况。 41 | 42 | 在导入过程中,如果被导入文件中有`import`关键字, 43 | 则将先执行被导入文件的`import`。 44 | 对于后两个关键字也是如此。 45 | 46 | 导入过程中如果遇到了键冲突的情况,将尝试递归地合并冲突的键所对应的值。 47 | 如果遇到无法合并的情况,则后出现的将覆盖先出现的。 48 | 49 | ### default 50 | 51 | `default`关键字用于指定默认值。例如以下两个写法是等价的: 52 | 53 | 写法一: 54 | 55 | ```yaml 56 | definition: 57 | def1: 58 | type: int 59 | value: 1 60 | def2: 61 | type: int 62 | value: 2 63 | def3: 64 | type: float 65 | value: 1.1 66 | ``` 67 | 68 | 写法二: 69 | 70 | ```yaml 71 | definition: 72 | default: 73 | type: int 74 | def1: 75 | value: 1 76 | def2: 77 | value: 2 78 | def3: 79 | type: float 80 | value: 1.1 81 | ``` 82 | 83 | `default`关键字支持字符串、列表或者字典作为值。 84 | config解析器将尝试合并`default`的值和与`default`并列的键所对应的值。 85 | 如果遇到无法合并的情况,则`default`关键字下的值具有更低的优先级。 86 | 87 | ### overwrite 88 | 89 | `overwrite`关键字的用法和`default`类似, 90 | 只不过在遇到冲突情况时`overwrite`关键字下的值具有更高的优先级。 91 | 这个关键字常与`import`联用,用于统一设置这一配置文件下所要求的值。 92 | 93 | ## 配置文件 94 | 95 | 配置文件的主要目录结构如下: 96 | 97 | ``` 98 | configs 99 | ├── assignments 100 | │ ├── definition.yaml 101 | │ ├── default.yaml 102 | │ └── ... 103 | ├── agents 104 | ├── tasks 105 | │ ├── task_assembly.yaml 106 | │ └── ... 107 | └── start_task.yaml 108 | ``` 109 | 110 | ### assignments 111 | 112 | `assignments`目录下存放了所有的任务配置文件。 113 | 其中`definition.yaml`集合了所有的任务定义和模型定义。 114 | 115 | 单个任务配置文件主要需要以下字段: 116 | 117 | - `definition`: 通常import自`definition.yaml`,用于定义任务和模型。 118 | - `concurrency`: 用于定义模型的最大并行数。 119 | - `assignments`: 接受多个`assignment`,用于定义任务的具体分配。 120 | - `output`: 用于定义输出文件的路径。 121 | 122 | 单个`assignment`需要两个字段: 123 | 124 | - `agents`: 此任务需要运行的agent的名称。 125 | - `tasks`: 此任务需要运行的task的名称。 126 | 127 | ### agents 128 | 129 | `agents`目录下存放了所有的agent配置文件。 130 | 配置中键是agent的名称,值是agent的配置。 131 | 单个agent配置需要以下字段: 132 | 133 | - `module`: 定义对应的agent client模块。 134 | - `parameters`: 定义需要传入对应模块的参数。 135 | 136 | ### tasks 137 | 138 | `tasks`目录下存放了所有的task配置文件。 139 | 其中`task_assembly.yaml`集合了所有的task定义。 140 | 如果只是想运行现有的任务,一般不需要修改此目录下的文件。 141 | 142 | 与agent配置类似,键是task的名称,值是task的配置。 143 | 单个task配置需要以下字段: 144 | 145 | - `module`: 定义对应的task模块。 146 | - `parameters`: 定义需要传入对应模块的参数。 147 | 148 | ### start_task.yaml 149 | 150 | 这个配置文件用于与`src.start_task`配合,自动化批量启动task_worker。 151 | 这个文件的字段如下: 152 | 153 | - `definition`: 用于定义任务,通常import自`task_assembly.yaml`。 154 | - `start(Optional)`: 用于指定需要启动的任务,键是任务名称,值是需要启动的worker的个数。 155 | - `controller_address(Optional)`: 用于指定controller的地址,默认http://localhost:5000/api/ 156 | -------------------------------------------------------------------------------- /docs/Entrance_cn.md: -------------------------------------------------------------------------------- 1 | # 框架入口 2 | 3 | [🌏English](Entrance_en.md) 4 | 5 | 框架主要的入口是: 6 | 7 | - `src.server.task_controller`: 用于手动启动task_controller。 8 | - `src.start_task`: 用于启动task_worker。 9 | - `src.assigner`: 用于启动评测。 10 | - `src.server.task_worker`: 用于手动启动task_worker。 11 | 12 | ## src.server.task_controller 13 | 14 | task_controller是task server的核心,用于管理所有的task_worker。 15 | task_controller应该是最先启动的,且推荐常开,如无必要也建议全局唯一。 16 | task_controller默认运行在5000端口,也可以通过`--port -p`参数指定。 17 | 所有接口有统一的前缀`/api/`。 18 | 19 | 一个启动task_controller并指定其运行在3000端口的示例: 20 | 21 | ```bash 22 | python -m src.server.task_controller -p 3000 23 | ``` 24 | 25 | task_controller有以下几个用于监控的接口: 26 | 27 | | 接口 | 方法 | 参数 | 说明 | 28 | |----------------|------|----|------------------------------------------------------| 29 | | /list_workers | GET | 无 | 返回所有的task_worker | 30 | | /list_sessions | GET | 无 | 返回所有的session | 31 | | /sync_all | POST | 无 | 同步所有的task_worker上正在运行的session,如controller意外重启应先调用此接口 | 32 | | /cancel_all | POST | 无 | 取消所有的task_worker上正在运行的session | 33 | 34 | ## src.start_task 35 | 36 | start_task是用于启动task_worker的脚本,其主要功能是读取配置文件并启动task_worker。 37 | start_task的配置文件是`configs/start_task.yaml`,具体详见配置文件介绍。 38 | 39 | start_task的参数如下: 40 | 41 | - `[--config CONFIG]`: 指定要读取的配置文件,默认为`configs/start_task.yaml`,通常没有必要更改。 42 | - `[--start | -s [TASK_NAME NUM [TASK_NAME NUM ...]]]`: 指定要启动的任务,格式为`TASK_NAME NUM`,其中`TASK_NAME` 43 | 是任务名称,`NUM`是需要启动的worker的个数,如此参数被指定则将覆盖**所有**配置文件中的设置。 44 | - `[--auto-controller | -a]`: 指定是否自动启动task_controller,默认为否。 45 | - `[--base-port | -p PORT]`: 46 | 指定task_worker的基础端口,默认为5001,task_worker将从PORT开始依次启动task_worker。如若共有N个task_worker,那么task_worker的端口将从PORT到PORT+N-1。 47 | 48 | ## src.assigner 49 | 50 | assigner是用于启动评测的脚本,其主要功能是读取配置文件并启动评测,并将结果实时保存在指定的输出文件夹中。 51 | 52 | assigner的参数如下: 53 | 54 | - `[--config CONFIG]`: 指定要读取的配置文件,默认为`configs/assignments/default.yaml`。 55 | - `[--auto-retry]`: 自动重新测试失败的样例 56 | 57 | 如配置文件中的`output`字段的值中含有`{TIMESTAMP}`,则此处将会被替换为当前时间并继续后续的操作(即相同的配置文件可能会有不同的输出文件夹)。 58 | 59 | 如果配置中`output`字段指定的目录已经存在,则assigner将会尝试从此文件夹中读取已有的评测结果,在此基础上继续评测。 60 | 61 | assigner**每次**启动都会将读取的配置文件解析并存储到`output`字段指定的目录中,**如目录中已有配置文件,该文件将被覆盖**。 62 | 63 | ## src.server.task_worker 64 | 65 | 一个task_worker对应了一个任务进程,同样的任务可以有多个task_worker。 66 | 如无必要,**不推荐**手动启动task_worker,而是通过`src.start_task`启动。 67 | 68 | task_worker的参数如下: 69 | 70 | - `NAME` 任务名称,用于指定要启动的任务。 71 | - `[--config | -c CONFIG]` 指定要读取的配置文件,默认为`configs/tasks/task_assembly.yaml`。 72 | - `[--port | -p PORT]` 指定task_worker的端口,默认为5001。 73 | - `[--controller | -C ADDRESS]` 指定task_controller的地址,默认为http://localhost:5000/api 。 74 | - `[--self ADDRESS]` 指定task_worker的地址,默认为http://localhost:5001/api 75 | ,此地址将会被task_controller用于与task_worker通信,所以需要确保task_controller能够访问到此地址。 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy~=1.23.5 2 | pydantic~=1.10.12 3 | requests~=2.28.1 4 | tqdm~=4.65.0 5 | pyyaml~=6.0 6 | jsonlines~=3.1.0 7 | aiohttp~=3.8.4 8 | uvicorn~=0.22.0 9 | fastapi~=0.101.1 10 | urllib3~=1.26.15 11 | mysql-connector-python==8.0.33 12 | docker==6.1.2 13 | SPARQLWrapper 14 | networkx~=2.8.4 15 | anthropic~=0.4.1 16 | fschat~=0.2.31 17 | accelerate~=0.23.0 18 | transformers~=4.34.0 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/__init__.py -------------------------------------------------------------------------------- /src/client/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent import AgentClient 2 | from .agents import * 3 | from .task import TaskClient 4 | -------------------------------------------------------------------------------- /src/client/agent.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | class AgentClient: 5 | def __init__(self, *args, **kwargs): 6 | pass 7 | 8 | def inference(self, history: List[dict]) -> str: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /src/client/agent_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from src.configs import ConfigLoader 4 | from src.typings import InstanceFactory 5 | from .agent import AgentClient 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--config', type=str, default='configs/agents/api_agents.yaml') 11 | parser.add_argument('--agent', type=str, default='gpt-3.5-turbo-0613') 12 | return parser.parse_args() 13 | 14 | 15 | def interaction(agent: AgentClient): 16 | try: 17 | history = [] 18 | while True: 19 | print("================= USER ===================") 20 | user = input(">>> ") 21 | history.append({"role": "user", "content": user}) 22 | try: 23 | agent_response = agent.inference(history) 24 | print("================ AGENT ====================") 25 | print(agent_response) 26 | history.append({"role": "agent", "content": agent_response}) 27 | except Exception as e: 28 | print(e) 29 | exit(0) 30 | except KeyboardInterrupt: 31 | print("\n[Exit] KeyboardInterrupt") 32 | exit(0) 33 | 34 | 35 | if __name__ == '__main__': 36 | args = parse_args() 37 | loader = ConfigLoader() 38 | config = loader.load_from(args.config) 39 | assert args.agent in config, f"Agent {args.agent} not found in {args.config}" 40 | agent_config = config[args.agent] 41 | factory = InstanceFactory(**agent_config) 42 | agent_client: AgentClient = factory.create() 43 | interaction(agent_client) 44 | -------------------------------------------------------------------------------- /src/client/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from .fastchat_client import FastChatAgent 2 | from .http_agent import HTTPAgent 3 | -------------------------------------------------------------------------------- /src/client/agents/claude_agent.py: -------------------------------------------------------------------------------- 1 | import anthropic 2 | import os 3 | from copy import deepcopy 4 | from typing import List 5 | 6 | from ..agent import AgentClient 7 | 8 | 9 | class Claude(AgentClient): 10 | def __init__(self, api_args=None, *args, **config): 11 | super().__init__(*args, **config) 12 | if not api_args: 13 | api_args = {} 14 | api_args = deepcopy(api_args) 15 | self.key = api_args.pop("key", None) or os.getenv('Claude_API_KEY') 16 | api_args["model"] = api_args.pop("model", None) 17 | if not self.key: 18 | raise ValueError("Claude API KEY is required, please assign api_args.key or set OPENAI_API_KEY " 19 | "environment variable.") 20 | if not api_args["model"]: 21 | raise ValueError("Claude model is required, please assign api_args.model.") 22 | self.api_args = api_args 23 | if not self.api_args.get("stop_sequences"): 24 | self.api_args["stop_sequences"] = [anthropic.HUMAN_PROMPT] 25 | 26 | def inference(self, history: List[dict]) -> str: 27 | prompt = "" 28 | for message in history: 29 | if message["role"] == "user": 30 | prompt += anthropic.HUMAN_PROMPT + message["content"] 31 | else: 32 | prompt += anthropic.AI_PROMPT + message["content"] 33 | prompt += anthropic.AI_PROMPT 34 | c = anthropic.Client(api_key=self.key) 35 | resp = c.completions.create(prompt=prompt, **self.api_args) 36 | return str(resp.completion) 37 | -------------------------------------------------------------------------------- /src/client/agents/test_agent.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from src.client import AgentClient 4 | 5 | 6 | class CountHistoryAgent(AgentClient): 7 | def __init__(self, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | 10 | def inference(self, history: List[dict]) -> str: 11 | return "I received {} items in history.".format(len(history)) 12 | -------------------------------------------------------------------------------- /src/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/alfworld/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import ALFWorld -------------------------------------------------------------------------------- /src/server/tasks/alfworld/environment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import glob 5 | import random 6 | import numpy as np 7 | 8 | import textworld 9 | import textworld.agents 10 | import textworld.gym 11 | import gym 12 | 13 | from alfworld.agents.utils.misc import Demangler, get_templated_task_desc, add_task_to_grammar 14 | import alfworld.agents.modules.generic as generic 15 | from alfworld.agents.environment.alfred_tw_env import AlfredTWEnv 16 | 17 | class SingleAlfredTWEnv(AlfredTWEnv): 18 | ''' 19 | Interface for Textworld Env 20 | Contains only one game_file per environment 21 | ''' 22 | 23 | def __init__(self, config, name, train_eval="eval_out_of_distribution"): 24 | print("Initializing AlfredTWEnv...") 25 | self.config = config 26 | self.train_eval = train_eval 27 | 28 | self.goal_desc_human_anns_prob = self.config['env']['goal_desc_human_anns_prob'] 29 | self.get_game_logic() 30 | # self.gen_game_files(regen_game_files=self.config['env']['regen_game_files']) 31 | 32 | self.random_seed = 42 33 | 34 | self.game_files = [name] 35 | self.num_games = 1 36 | 37 | def get_all_game_files(config, split="eval_out_of_distribution"): 38 | env = AlfredTWEnv(config, train_eval=split) 39 | game_files = env.game_files 40 | del env 41 | return game_files 42 | 43 | # if __name__=="__main__": 44 | # os.environ["ALFWORLD_DATA"] = "/data/share/leixy/ReAct/alfworld/data" 45 | # config=load_config("/data/share/leixy/AgentBench/src/tasks/alfworld/configs/base_config.yaml") 46 | # game_files = get_all_game_files(config, "train") 47 | # game_files = [game.split("data/")[-1] for game in game_files] 48 | # with open("train.json", "w") as f: 49 | # f.write(json.dumps(game_files, indent=2)) 50 | # f.close() 51 | # print(len(game_files)) 52 | # print(game_files[0]) 53 | -------------------------------------------------------------------------------- /src/server/tasks/alfworld/prompts/alfworld_multiturn_cot.json: -------------------------------------------------------------------------------- 1 | { 2 | "naive_example": [ 3 | "You are in the middle of a room. Looking quickly around you, you see a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a countertop 1, a garbagecan 1, a handtowelholder 2, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1.\nYour task is to: put some spraybottle on toilet.\n", 4 | "THOUGHT: The task is to put some spraybottle on toilet, so first i need to search the room for some spraybottle.\n ACTION: go to cabinet 1\n", 5 | "On the cabinet 1, you see a cloth 1, a soapbar 1, a soapbottle 1.\n", 6 | "THOUGHT: On the cabinet 1, there is no spraybottle, so i need to search somewhere else.\n ACTION: go to cabinet 2\n", 7 | "The cabinet 2 is closed.\n", 8 | "THOUGHT: since the cabinet 2 is closed, i need to open it.\n ACTION: open cabinet 2\n", 9 | "You open the cabinet 2. The cabinet 2 is open. In it, you see a candle 1, and a spraybottle 2.\n", 10 | "THOUGHT: I have found a spraybottle 2, now i need to take it with me.\n ACTION: take spraybottle 2 from cabinet 2\n", 11 | "You pick up the spraybottle 2 from the cabinet 2.\n", 12 | "THOUGHT: I have taken the spraybottle 2 and the final target is to put it on toilet, so i need to go to toilet 1.\n ACTION: go to toilet 1\n", 13 | "On the toilet 1, you see a soapbottle 2.\n", 14 | "THOUGHT: The final target is to put it on toilet, now i have the spraybottle and i need to put it in/on toilet 1.\n ACTION: put spraybottle 2 in/on toilet 1\n", 15 | "You put the spraybottle 2 in/on the toilet 1.\n", 16 | "THOUGHT: I have completed the task.\n ACTION: Task succeeded.\n" 17 | ] 18 | } -------------------------------------------------------------------------------- /src/server/tasks/alfworld/utils.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | from typing import List 3 | import re 4 | import threading 5 | import jsonlines 6 | import yaml 7 | import json 8 | import numpy as np 9 | from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction 10 | 11 | def bleu_score(reference, candidate): 12 | reference_tokens = reference.split() 13 | candidate_tokens = candidate.split() 14 | 15 | smoothie = SmoothingFunction().method4 16 | score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie) 17 | return score 18 | 19 | def process_ob(ob): 20 | if ob.startswith('You arrive at loc '): 21 | ob = ob[ob.find('. ')+2:] 22 | return ob 23 | 24 | def process_action(action, choices, limit=0.01, to_print=False): 25 | if to_print: 26 | print("preprocess action: ", action) 27 | match = re.search("ACTION:(.*)", action) 28 | if match: 29 | action = match.group(1) 30 | else: 31 | return False 32 | 33 | action = action.strip().lower().split("\n")[0] 34 | if not choices: 35 | return action 36 | if action in choices: 37 | return action 38 | try: 39 | bleus = [bleu_score(choice, action) for choice in choices] 40 | max_index = np.argmax(np.array(bleus)) 41 | max_score = bleus[max_index] 42 | if max_score > limit: 43 | if to_print: 44 | print("processed action: ", choices[max_index], " score: ", max_score) 45 | return choices[max_index] 46 | except Exception as e: 47 | print("encounter exception: ", e) 48 | print("choices: ", choices) 49 | print("action: ", action) 50 | return action 51 | 52 | def load_prompts(prompts_file): 53 | with open(prompts_file, 'r') as f: 54 | d = json.load(f) 55 | f.close() 56 | return d 57 | 58 | def load_config(config_file): 59 | with open(config_file) as reader: 60 | config = yaml.safe_load(reader) 61 | return config -------------------------------------------------------------------------------- /src/server/tasks/avalon/README.md: -------------------------------------------------------------------------------- 1 | # AvalonBench 2 | 3 | ## Quick Start 4 | 5 | ### Start the task server and the assigner 6 | 7 | Start the game (3 is the number of workers) 8 | ```bash 9 | python -m src.start_task -a --start avalon-dev-single 3 10 | ``` 11 | Start the assigner 12 | ```bash 13 | python -m src.assigner --config ./configs/assignments/test_avalon.yaml 14 | ``` 15 | 16 | ### Customize configurations and data 17 | 18 | 1. You can modify the file `configs/tasks/avalon.yaml` to configure the agent list. A config file looks like this: 19 | ```yaml 20 | default: 21 | module: "src.server.tasks.avalon.AvalonBench" 22 | parameters: 23 | num_players: 5 24 | discussion: False 25 | 26 | avalon-dev-naive: 27 | parameters: 28 | name: "AvalonBench-dev-naive" 29 | data_file: "data/avalon/dev.json" 30 | agent_list: ["naive", "naive", "naive", "naive", "naive"] 31 | 32 | avalon-dev-single: 33 | parameters: 34 | name: "AvalonBench-dev-single" 35 | data_file: "data/avalon/dev.json" 36 | agent_list: ["llm", "naive", "naive", "naive", "naive"] 37 | ``` 38 | where `naive` stands for the naive bots. Agents will play the roles with the same index in the data file (see following). 39 | ```plaintext 40 | Note: There should only be one "llm" in the `agent_list` 41 | ``` 42 | 43 | 2. You can also add data in `data/avalon/dev.json` (Note: Currently we only support the 5-player game setting, which includes 1 Merlin, 2 Servants, 1 Minion and 1 Assassin). A data item looks like this: 44 | 45 | ```json 46 | { 47 | "num_players": 5, 48 | "quest_leader": 0, 49 | "role_names": ["Assassin", "Servant", "Servant", "Merlin", "Minion"] 50 | } 51 | ``` 52 | where `quest_leader` is the id of the initial quest leader in this game. You can change the game setup by altering `quest_leader` with number from 0 to 4, and by permuting `role_names`. 53 | 54 | ### Naive experiment 55 | 56 | You can also start a naive experiment using: 57 | ```bash 58 | python -m src.start_task -a --start avalon-dev-naive 3 59 | ``` 60 | where all the agents are naive bots. For details of the naive strategies, please refer to the [paper](https://arxiv.org/pdf/2310.05036.pdf). 61 | 62 | ## Prompts 63 | 64 | All the prompts are maintained in `src/server/tasks/avalon/prompt.py`. You can find the respective prompts used in `src/server/tasks/avalon/agents/llm_with_discussion.py` and `src/server/tasks/avalon/wrapper.py`. -------------------------------------------------------------------------------- /src/server/tasks/avalon/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import AvalonBench -------------------------------------------------------------------------------- /src/server/tasks/avalon/agents/agent.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from ..engine import AvalonBasicConfig 3 | class Agent: 4 | r"""The base class for all agents. 5 | 6 | Args: 7 | id (int): The Player id of the agent. 8 | role (int): The role (id) of the agent. 9 | config (AvalonBasicConfig): The config of the agent. 10 | 11 | To implement your own agent, subclass this class and implement the following methods: 12 | - :method:`Agent.propose_team` 13 | - :method:`Agent.vote_on_team` 14 | - :method:`Agent.vote_on_mission` 15 | """ 16 | def __init__(self, id: int, role: int, config: AvalonBasicConfig) -> None: 17 | self.id = id 18 | self.name = f"Player {id}" 19 | self.role = role 20 | self.role_name = config.ROLES[role] 21 | self.config = config 22 | 23 | 24 | def propose_team(self, mission_id: int) -> frozenset[int]: 25 | r"""Propose a team of given size. 26 | 27 | Args: 28 | mission_id (int): The id of the mission. team_size = config.num_players_for_quest[mission_id] 29 | 30 | Returns: 31 | frozenset[int]: The list of player ids to be included in the team. 32 | """ 33 | raise NotImplementedError 34 | 35 | 36 | def vote_on_team(self, mission_id: int, team: frozenset[int]) -> bool: 37 | r"""Vote on a given team. 38 | 39 | Args: 40 | mission_id (int): The id of the mission. num_fails = self.config.num_fails_for_quest[mission_id] 41 | team (frozenset[int]): The list of player ids included in the team. 42 | 43 | Returns: 44 | bool: The vote result. 45 | """ 46 | raise NotImplementedError 47 | 48 | def vote_on_mission(self, mission_id: int, quest_team: frozenset[int]) -> bool: 49 | r"""Vote on a quest (team). 50 | 51 | Args: 52 | mission_id (int): The id of the mission. num_fails = self.config.num_fails_for_quest[mission_id] 53 | quest_team (frozenset[int]): The list of player ids included in the quest. 54 | 55 | Returns: 56 | bool: The vote result. 57 | """ 58 | raise NotImplementedError 59 | 60 | 61 | def assassinate(self, num_players: int) -> int: 62 | r"""Assassinate a player. 63 | 64 | Args: 65 | num_players (int): The number of players in the game. 66 | 67 | Returns: 68 | int: The id of the player to assassinate. The id is in the range [0, num_players). 69 | """ 70 | raise NotImplementedError 71 | 72 | 73 | def get_believed_sides(self, num_players: int) -> List[float]: 74 | r"""Get the believed sides of all players. 75 | 76 | Args: 77 | num_players (int): The number of players in the game. 78 | 79 | Returns: 80 | List[float]: The list of believed sides (probability) of all players. 81 | """ 82 | raise NotImplementedError -------------------------------------------------------------------------------- /src/server/tasks/avalon/avalon_exception.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from src.typings import AgentBenchException 3 | 4 | class AvalonException(AgentBenchException): 5 | def __init__(self, reason: str, detail: Union[str, None] = None) -> None: 6 | super().__init__() 7 | self.reason = reason 8 | self.detail = detail 9 | 10 | def __str__(self) -> str: 11 | if not self.detail: 12 | return "{CLASS_NAME}[{REASON}]".format( 13 | CLASS_NAME=self.__class__.__name__, REASON=self.reason 14 | ) 15 | else: 16 | return "{CLASS_NAME}[{REASON}]: {DETAIL}".format( 17 | CLASS_NAME=self.__class__.__name__, 18 | REASON=self.reason, 19 | DETAIL=self.detail, 20 | ) 21 | 22 | class AvalonEnvException(AvalonException): 23 | def __init__(self, detail: Union[str, None] = None) -> None: 24 | super().__init__("Avalon Environment Exception", detail) 25 | 26 | class AvalonAgentActionException(AvalonException): 27 | def __init__(self, detail: Union[str, None] = None) -> None: 28 | super().__init__("Invalid action (result) with retry", detail) -------------------------------------------------------------------------------- /src/server/tasks/avalon/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def get_vote_result(answer: str): 4 | match_vote = "Yes|No" 5 | vote_result = [] 6 | 7 | vote_result = re.findall(match_vote, answer) 8 | 9 | result = '' if len(vote_result) == 0 else vote_result[-1] 10 | 11 | return result 12 | 13 | def get_team_result(answer: str): 14 | match_num = r"\d+" 15 | player_list = [] 16 | 17 | player_list = re.findall(match_num, answer) 18 | 19 | player_list = [int(id) for id in player_list] 20 | 21 | return player_list 22 | 23 | def get_assassination_result(message: str, answer: str): 24 | match_num = r"\d+" 25 | player_id = [] 26 | 27 | player_id = re.findall(match_num, str(message)+str(answer)) 28 | 29 | player_id = int(player_id[-1]) 30 | 31 | return player_id 32 | 33 | def get_believed_player_sides(answer): 34 | scores = eval(answer.split("Answer: ")[-1]) 35 | 36 | return scores 37 | 38 | def verbalize_team_result(team: frozenset, votes, outcome: bool): 39 | verbal_vote = { 40 | 0: "reject", 41 | 1: "approve" 42 | } 43 | verbalized_result = "" 44 | if outcome == True: 45 | verbalized_result = f"The team {str(list(team))} is approved." 46 | elif outcome == False: 47 | verbalized_result = f"The team {str(list(team))} is rejected." 48 | else: 49 | raise ValueError("Invalid outcome %s" % outcome) 50 | 51 | for idx, vote in enumerate(votes): 52 | verbalized_result += " Player %d voted %s." % (idx, verbal_vote[vote]) 53 | 54 | return verbalized_result 55 | 56 | def verbalize_mission_result(team: frozenset, outcome: bool): 57 | verbalized_result = "" 58 | if outcome == True: 59 | verbalized_result = "The mission succeeded." 60 | elif outcome == False: 61 | verbalized_result = "The mission failed." 62 | else: 63 | raise ValueError("Invalid outcome %s" % outcome) 64 | 65 | verbalized_result += " The team is %s, which contains" % str(list(team)) 66 | for member in team: 67 | verbalized_result += " Player %s" % str(member) 68 | 69 | return verbalized_result -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/action1.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | import random 3 | 4 | from sdk.ai_client import Action, AIClient, Game 5 | from prompt.en import enemy_fish, your_fish 6 | 7 | class AI(AIClient): 8 | def __init__(self, stage) -> None: 9 | super().__init__() 10 | self.stage = stage 11 | self.name_to_id = {"spray": 1, "flame": 2, "eel": 3, "sunfish": 4, "barracuda": 5, "mobula": 6, "octopus": 8, "whiteshark": 9, "hammerhead": 10} 12 | self.id_to_name = {} 13 | for name, id in self.name_to_id.items(): 14 | self.id_to_name[id] = name 15 | self.id_to_name[-1] = "unknown" 16 | self.ai_fish = [] 17 | random.seed(42) 18 | 19 | def Pick(self, game: Game) -> List[int]: 20 | pick_list = [] 21 | self.pos_to_name = enemy_fish 22 | 23 | for i in range(4): 24 | pick_list.append(self.name_to_id[self.pos_to_name[i]]) 25 | self.ai_fish.append(self.name_to_id[your_fish[i]]) 26 | 27 | #random.shuffle(pick_list) 28 | return pick_list 29 | 30 | def Assert(self, game: Game) -> Tuple[int, int]: 31 | if self.stage == 1: 32 | return (-1, -1) 33 | else: 34 | return (self.get_enemy_living_fishes()[0], random.choice(self.ai_fish)) 35 | 36 | def Act(self, game: Game) -> Action: 37 | action = Action(game) 38 | my_pos: int = self.get_my_living_fishes()[0] 39 | action.set_action_fish(my_pos) 40 | return self.auto_valid_action(my_pos, action) 41 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/basline1.py: -------------------------------------------------------------------------------- 1 | from action1 import AI 2 | import sys 3 | 4 | if __name__ == "__main__": 5 | stage = int(sys.argv[1]) 6 | 7 | myAI = AI(stage) 8 | myAI.run() 9 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/basline2.py: -------------------------------------------------------------------------------- 1 | from action2 import AI 2 | import sys 3 | 4 | if __name__ == "__main__": 5 | stage = int(sys.argv[1]) 6 | 7 | myAI = AI(stage) 8 | myAI.run() 9 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/client.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import threading 3 | import json 4 | import time 5 | 6 | class Client: 7 | def __init__(self, port, host='localhost'): 8 | print(333) 9 | self.host = host 10 | self.port = port 11 | self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 12 | self.socket.connect((self.host, self.port)) 13 | self.stop_flag = threading.Event() 14 | 15 | def quit(self): 16 | self.send_message("") 17 | self.stop() 18 | 19 | def llm_call(self, history, prompt, system): 20 | message = [] 21 | 22 | if system: 23 | history.append((system, "Okay, I will play the game with you according to the rules.")) 24 | 25 | for ix, chat in enumerate(history): 26 | message.append({ 27 | "role": "user", 28 | "content": chat[0] 29 | }) 30 | message.append({ 31 | "role": "agent", 32 | "content": chat[1] 33 | }) 34 | 35 | message.append({ 36 | "role": "user", 37 | "content": prompt 38 | }) 39 | #with open("client.txt", "a") as f: 40 | # f.write(json.dumps(message) + "\n") 41 | self.send_message(json.dumps(message)) 42 | output = self.receive_messages() 43 | 44 | #with open("client.txt", "a") as f: 45 | # f.write(json.dumps(output) + "\n######################\n") 46 | return output 47 | 48 | def receive_messages(self): 49 | while not self.stop_flag.is_set(): 50 | data = self.socket.recv(1000000).decode() 51 | if data: 52 | return data 53 | 54 | def send_message(self, message): 55 | self.socket.sendall(message.encode()) 56 | 57 | def stop(self): 58 | self.stop_flag.set() 59 | self.socket.close() -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/main.py: -------------------------------------------------------------------------------- 1 | print(777) 2 | import sys 3 | from client import Client 4 | 5 | print(666) 6 | if __name__ == "__main__": 7 | print(555) 8 | language = sys.argv[1] 9 | stage = int(sys.argv[2]) 10 | order = int(sys.argv[3]) 11 | save_dir = sys.argv[4] 12 | port = int(sys.argv[5]) 13 | client = Client(port=port) 14 | if language == 'en': 15 | from AI_En import Agent 16 | myAI = Agent(client, stage, order, save_dir) 17 | else: 18 | from AI_Cn import Agent 19 | myAI = Agent(client, stage, order, save_dir) 20 | 21 | myAI.run() 22 | client.quit() -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/prompt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/prompt/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/prompt/cn.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | action_format = { 4 | "选择的鱼": "选择我方仍活着的一条鱼", 5 | "动作": "选择主动技能或普通攻击", 6 | "目标位置": "目标位置,必须在 [0, 3] 范围内" 7 | } 8 | 9 | guess_format = { 10 | "猜测种类": "选择你想要猜测的种类", 11 | "目标位置": "目标位置,必须在 [0, 3] 范围内" 12 | } 13 | 14 | fish_description = { 15 | "射水鱼": { 16 | "被动技能": r"反弹:队友被直接攻击后,若其生命值少于30%,对来源造成60的伤害", 17 | "主动技能": r"范围伤害:对敌方所有目标发动攻击,每个造成35%攻击力的伤害" 18 | }, 19 | "喷火鱼": { 20 | "被动技能": r"反弹:队友被直接攻击后,若其生命值少于30%,对来源造成60的伤害", 21 | "主动技能": r"内斗:对一名队友发动攻击造成75伤害,并使自身攻击力永久提升140" 22 | }, 23 | "电鳗": { 24 | "被动技能": r"伤害转移:自身受到敌方直接攻击而受到伤害时,如果还有队友存活,会让队友帮忙承受伤害(自身承受70%,其余队友平摊30%);每累计受到的实际伤害值达到200,自身攻击值永久提升40", 25 | "主动技能": r"范围伤害:对敌方所有目标发动攻击,每个造成35%攻击力的伤害" 26 | }, 27 | "翻车鱼": { 28 | "被动技能": r"伤害转移:自身受到敌方直接攻击而受到伤害时,如果还有队友存活,会让队友帮忙承受伤害(自身承受70%,其余队友平摊30%);每累计受到的实际伤害值达到200,自身攻击值永久提升40", 29 | "主动技能": r"内斗:对一名队友发动攻击造成75伤害,并使自身攻击力永久提升140" 30 | } 31 | } 32 | 33 | ACTION_PROMPT = """这是一个双人对战游戏,每队拥有四条宠物鱼。 34 | 每条鱼都有400初始生命值、200攻击力,以及自己的主动技能和被动技能。 35 | 每个回合,你可以选择一条活着的鱼,并且使用主动技能,或用普通攻击敌方鱼。 36 | 鱼的被动技能都会自动触发,只要被动技能条件满足。 37 | %s 38 | 获胜的条件是在比赛结束时有更多的鱼活着。 39 | 40 | 以下是四种宠物鱼: 41 | %s 42 | 43 | 与我一起进行对战。在每一轮中,你都应该输出你的思考过程,并用以下JSON格式返回你的动作: 44 | %s 45 | 46 | 注意!你必须在每一轮中返回你的动作。否则,你将被直接判负。""" 47 | 48 | GUESS_RULE = """你拥有的鱼的初始信息是隐藏的,敌方可以在每回合内猜测你的一条鱼的种类,如果敌方猜对了,你的那条鱼的种类将被揭示,并且你的所有鱼会受到50的伤害""" 49 | 50 | GUESS_PROMPT = """这是一个双人对战游戏,每队拥有四条宠物鱼。 51 | 每条鱼都有初始生命值、攻击力,以及自己的主动技能和被动技能。 52 | 所有的鱼的初始信息是隐藏的,你可以在每回合内猜测敌方的一条鱼的种类,如果你猜对了,敌方那条鱼的种类将被揭示,并且敌方的所有鱼会受到50的伤害。 53 | 获胜的条件是在比赛结束时有更多的鱼活着。 54 | 55 | 以下是四种宠物鱼: 56 | %s 57 | 58 | 与我一起进行对战。在每一轮中,你都应该输出你的思考过程,并用以下JSON格式返回你的动作: 59 | %s 60 | 61 | 注意!你必须在每一轮中返回你的动作。否则,你将被直接判负。""" 62 | 63 | ACTION_DESCRIPTION = { 64 | 1: ACTION_PROMPT % ('', fish_description, action_format), 65 | 2: ACTION_PROMPT % (GUESS_RULE, fish_description, action_format) 66 | } 67 | 68 | GUESS_DESCRIPTION = { 69 | 2: GUESS_PROMPT % (fish_description, guess_format), 70 | } -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/prompt/cn_en.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | action_format = { 4 | "pick_fish": "pick an alive fish", 5 | "action": "choose from [normal, active]", 6 | "target_position": "target's position, you must choose from [0,3]" 7 | } 8 | 9 | guess_format = { 10 | "guess_type": "the enemy's fish type you may guess", 11 | "target_position": "guess target's position, you must choose from [0,3]" 12 | } 13 | 14 | fish_description = { 15 | "射水鱼": { 16 | "被动技能": r"反弹:队友被直接攻击后,若其生命值少于30%,对来源造成60的伤害", 17 | "主动技能": r"范围伤害:对敌方所有目标发动攻击,每个造成35%攻击力的伤害" 18 | }, 19 | "喷火鱼": { 20 | "被动技能": r"反弹:队友被直接攻击后,若其生命值少于30%,对来源造成60的伤害", 21 | "主动技能": r"内斗:对一名队友发动攻击造成100伤害,并使自身攻击力永久提升140" 22 | }, 23 | "电鳗": { 24 | "被动技能": r"伤害转移:自身受到敌方直接攻击而受到伤害时,如果还有队友存活,会让队友帮忙承受伤害(自身承受70%,其余队友平摊30%);每累计受到的实际伤害值达到200,自身攻击值永久提升40", 25 | "主动技能": r"范围伤害:对敌方所有目标发动攻击,每个造成35%攻击力的伤害" 26 | }, 27 | "翻车鱼": { 28 | "被动技能": r"伤害转移:自身受到敌方直接攻击而受到伤害时,如果还有队友存活,会让队友帮忙承受伤害(自身承受70%,其余队友平摊30%);每累计受到的实际伤害值达到200,自身攻击值永久提升40", 29 | "主动技能": r"内斗:对一名队友发动攻击造成100伤害,并使自身攻击力永久提升140" 30 | } 31 | } 32 | 33 | ACTION_PROMPT = """这是一个双人对战游戏,每队拥有四条宠物鱼。 34 | 每条鱼都有400初始生命值、200攻击力,以及自己的主动技能和被动技能。 35 | 每个回合,你可以选择一条活着的鱼,并且使用主动技能,或用普通攻击敌方鱼。 36 | 鱼的被动技能都会自动触发,只要被动技能条件满足。 37 | %s 38 | 获胜的条件是在比赛结束时有更多的鱼活着。 39 | 40 | 以下是四种宠物鱼: 41 | %s 42 | 43 | 与我一起进行对战。在每一轮中,你都应该输出你的思考过程,并用以下JSON格式返回你的动作: 44 | %s 45 | 46 | 注意!你必须在每一轮中返回你的动作。否则,你将被直接判负。""" 47 | 48 | GUESS_RULE = """你拥有的鱼的初始信息是隐藏的,敌方可以在每回合内猜测你的一条鱼的种类,如果敌方猜对了,你的那条鱼的种类将被揭示,并且你的所有鱼会受到50的伤害""" 49 | 50 | GUESS_PROMPT = """这是一个双人对战游戏,每队拥有四条宠物鱼。 51 | 每条鱼都有初始生命值、攻击力,以及自己的主动技能和被动技能。 52 | 所有的鱼的初始信息是隐藏的,你可以在每回合内猜测敌方的一条鱼的种类,如果你猜对了,敌方那条鱼的种类将被揭示,并且敌方的所有鱼会受到50的伤害。 53 | 获胜的条件是在比赛结束时有更多的鱼活着。 54 | 55 | 以下是四种宠物鱼: 56 | %s 57 | 58 | 与我一起进行对战。在每一轮中,你都应该输出你的思考过程,并用以下JSON格式返回你的动作: 59 | %s 60 | 61 | 注意!你必须在每一轮中返回你的动作。否则,你将被直接判负。""" 62 | 63 | ACTION_DESCRIPTION = { 64 | 1: ACTION_PROMPT % ('', fish_description, action_format), 65 | 2: ACTION_PROMPT % (GUESS_RULE, fish_description, action_format) 66 | } 67 | 68 | GUESS_DESCRIPTION = { 69 | 2: GUESS_PROMPT % (fish_description, guess_format), 70 | } -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | 3 | project(py_ai_sdk CXX) 4 | 5 | set(CMAKE_BUILD_TYPE Release) 6 | if(MSVC) 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8 /std:c++17") 8 | else(GCC or Clang) 9 | set(CMAKE_CXX_STANDARD 17) 10 | endif(MSVC) 11 | set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) 12 | 13 | # set(CMAKE_PREFIX_PATH "") 14 | # find_package(Python EXACT COMPONENTS Interpreter Development REQUIRED) 15 | set(pybind11_DIR 16 | "/home/piano/mambaforge/lib/python3.10/site-packages/pybind11/share/cmake/pybind11") 17 | find_package(pybind11 REQUIRED) 18 | 19 | file(GLOB_RECURSE jsoncpp "jsoncpp/*.cpp") 20 | pybind11_add_module(ai_client py_ai_sdk.cpp ${jsoncpp}) 21 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/ai_client.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/ai_client.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/ai_client.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/ai_client.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CMakeDetermineCompilerABI_CXX.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CMakeDetermineCompilerABI_CXX.bin -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CMakeSystem.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_HOST_SYSTEM "Linux-5.10.16.3-microsoft-standard-WSL2") 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux") 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.10.16.3-microsoft-standard-WSL2") 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") 5 | 6 | 7 | 8 | set(CMAKE_SYSTEM "Linux-5.10.16.3-microsoft-standard-WSL2") 9 | set(CMAKE_SYSTEM_NAME "Linux") 10 | set(CMAKE_SYSTEM_VERSION "5.10.16.3-microsoft-standard-WSL2") 11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64") 12 | 13 | set(CMAKE_CROSSCOMPILING "FALSE") 14 | 15 | set(CMAKE_SYSTEM_LOADED 1) 16 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CompilerIdCXX/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CompilerIdCXX/a.out -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.25 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/TargetDirectories.txt: -------------------------------------------------------------------------------- 1 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir 2 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/edit_cache.dir 3 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/rebuild_cache.dir 4 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/jsoncpp/jsoncpp.cpp" "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o" "gcc" "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o.d" 12 | "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/py_ai_sdk.cpp" "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o" "gcc" "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o.d" 13 | ) 14 | 15 | # Targets to which this target links. 16 | set(CMAKE_TARGET_LINKED_INFO_FILES 17 | ) 18 | 19 | # Fortran module output directory. 20 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 21 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o" 3 | "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o.d" 4 | "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o" 5 | "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o.d" 6 | "bin/ai_client.cpython-310-x86_64-linux-gnu.so" 7 | "bin/ai_client.pdb" 8 | ) 9 | 10 | # Per-language clean rules from dependency scanning. 11 | foreach(lang CXX) 12 | include(CMakeFiles/ai_client.dir/cmake_clean_${lang}.cmake OPTIONAL) 13 | endforeach() 14 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for ai_client. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for ai_client. 3 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for ai_client. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.25 3 | 4 | # compile CXX with /usr/bin/c++ 5 | CXX_DEFINES = -Dai_client_EXPORTS 6 | 7 | CXX_INCLUDES = -isystem /home/piano/mambaforge/lib/python3.10/site-packages/pybind11/include -isystem /mnt/e/conda/envs/ml/include/python3.10 8 | 9 | CXX_FLAGS = -O3 -DNDEBUG -fPIC -fvisibility=hidden -flto -fno-fat-lto-objects -std=gnu++17 10 | 11 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/link.txt: -------------------------------------------------------------------------------- 1 | /usr/bin/c++ -fPIC -O3 -DNDEBUG -flto -shared -o bin/ai_client.cpython-310-x86_64-linux-gnu.so CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o 2 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 1 2 | CMAKE_PROGRESS_2 = 2 3 | CMAKE_PROGRESS_3 = 3 4 | 5 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/cmake.check_cache: -------------------------------------------------------------------------------- 1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file 2 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 3 2 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/bin/ai_client.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/bin/ai_client.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/build/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "Release") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/usr/bin/objdump") 43 | endif() 44 | 45 | if(CMAKE_INSTALL_COMPONENT) 46 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") 47 | else() 48 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") 49 | endif() 50 | 51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT 52 | "${CMAKE_INSTALL_MANIFEST_FILES}") 53 | file(WRITE "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/${CMAKE_INSTALL_MANIFEST}" 54 | "${CMAKE_INSTALL_MANIFEST_CONTENT}") 55 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/py_json_cast.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef PY_JSON_CAST_HPP_INCLUDED 4 | #define PY_JSON_CAST_HPP_INCLUDED 5 | 6 | #include 7 | 8 | #include "jsoncpp/json/json.h" 9 | 10 | namespace pybind11::detail { 11 | template <> 12 | class type_caster { 13 | public: 14 | PYBIND11_TYPE_CASTER(Json::Value, _("Json")); 15 | 16 | bool load(handle src, bool) try { 17 | value = handle_to_json(src); 18 | return true; 19 | } catch (type_error) { 20 | return false; 21 | } 22 | 23 | static handle cast(Json::Value src, return_value_policy, handle) { 24 | return json_to_handle(src).release(); 25 | } 26 | 27 | private: 28 | static Json::Value handle_to_json(const handle &hdl) { 29 | if (hdl.ptr() == nullptr || hdl.is_none()) return Json::nullValue; 30 | if (isinstance(hdl)) return hdl.cast(); 31 | if (isinstance(hdl)) return hdl.cast(); 32 | if (isinstance(hdl)) return hdl.cast(); 33 | if (isinstance(hdl)) return hdl.cast(); 34 | if (isinstance(hdl) || isinstance(hdl) || 35 | isinstance(hdl)) { 36 | Json::Value ret = Json::arrayValue; 37 | for (const handle &h : hdl) ret.append(handle_to_json(h)); 38 | return ret; 39 | } 40 | if (isinstance(hdl)) { 41 | Json::Value ret = Json::objectValue; 42 | for (const handle &key : hdl) 43 | ret[str(key)] = handle_to_json(hdl[key]); 44 | return ret; 45 | } 46 | throw type_error("Bad cast from Python to C++: " + 47 | repr(hdl).cast()); 48 | } 49 | static object json_to_handle(const Json::Value &json) { 50 | if (json.isNull()) return none(); 51 | if (json.isBool()) return bool_(json.asBool()); 52 | if (json.isIntegral()) return int_(json.asLargestInt()); 53 | if (json.isNumeric()) return float_(json.asDouble()); 54 | if (json.isString()) return str(json.asCString()); 55 | if (json.isArray()) { 56 | list ret; 57 | for (const Json::Value &j : json) ret.append(json_to_handle(j)); 58 | return ret; 59 | } 60 | if (json.isObject()) { 61 | dict ret; 62 | for (Json::ValueConstIterator iter = json.begin(); 63 | iter != json.end(); ++iter) 64 | ret[str(iter.key().asCString())] = json_to_handle(*iter); 65 | return ret; 66 | } 67 | throw type_error("Bad cast from C++ to Python: " + json.asString()); 68 | } 69 | }; 70 | } // namespace pybind11::detail 71 | 72 | #endif // PY_JSON_CAST_HPP_INCLUDED -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/pyd/ai_client.cp38-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cp38-win_amd64.pyd -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-darwin.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-darwin.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /src/server/tasks/card_game/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import CardGame 2 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/judger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/judger/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | cmake-build-debug -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | g++ src/aqua_sdk.cpp src/fish.cpp src/fishset.cpp src/game.cpp src/main.cpp src/player.cpp src/jsoncpp/jsoncpp.cpp -o bin/main -std=c++17 -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/bin/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/logic/bin/main -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/fishset.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "fish.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | extern std::ofstream debuggerfs; 10 | void debugmsg(std::string str); 11 | 12 | class FishSet 13 | { 14 | private: 15 | std::vector fishs; 16 | 17 | public: 18 | int player_id; 19 | int timestamp; 20 | FishSet* another = nullptr; 21 | FishSet(int flag = 0); 22 | FishSet(const FishSet& base); // 根据一个已知鱼集合创建,得到一个鱼编号集合和给定集合一致的鱼集合 23 | Json::Value to_json() const; 24 | std::vector get_fishs() const; 25 | int get_size(); 26 | bool empty(); 27 | void clear(); 28 | void add(Fish* fish); 29 | void add(int id); // 添加对应id的鱼 30 | void remove(int id); // 移除对应id的鱼 31 | void to_fight(); // 所有鱼变为战斗状态 32 | void to_dead(); // 所有鱼变为阵亡状态 33 | void update_state(); // 将所有鱼从战斗状态更新到最新状态(可能是阵亡状态) 34 | // void hp_debuff(double rate); 35 | void hp_debuff(int dec); 36 | int count_live_fish(); 37 | int living_fish_count() const; // 存活的鱼的个数 38 | int hp_sum() const; // 鱼的 hp 之和 39 | int hp_max() const; // 鱼的 hp 最大值 40 | bool is_all_dead(); 41 | bool count(Fish* tar); 42 | int update_timestamp(); 43 | 44 | std::optional on_damaged(Fish *src, Fish *target, int dmg = -1); 45 | void set_fishset(FishSet *_ally, FishSet *_hostile); 46 | void start_turn(); 47 | // void debug_msg(std::string str); 48 | }; -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/game.cpp: -------------------------------------------------------------------------------- 1 | #include "game.h" 2 | 3 | // 将 Game::State 转为对应阶段名称 4 | std::string Game::state_info(Game::State state){ 5 | switch(state){ 6 | case READY: return "准备阶段"; 7 | case PICK: return "选择阶段"; 8 | case ASSERT: return "断言阶段"; 9 | case ACTION: return "行动阶段"; 10 | case END: return "游戏结束"; 11 | } 12 | return ""; 13 | } 14 | 15 | Game::Game(){ 16 | players.clear(); 17 | players.push_back(Player(0)); 18 | players.push_back(Player(1)); 19 | } 20 | 21 | /* 22 | 按照成员变量定义顺序转成 JSON 23 | 样例: 24 | { 25 | "winner": winner, 26 | "players": [ 27 | player1, 28 | player2, 29 | ... 30 | ], 31 | "state": state, 32 | "gamestate": gamestate, 33 | "cur_turn": cur_turn, 34 | "over": over, 35 | "cnt": cnt, 36 | "score": score, 37 | "rounds": rounds 38 | } 39 | */ 40 | Json::Value Game::to_json() const{ 41 | Json::Value json; 42 | json["winner"] = winner; 43 | json["players"].resize(0); 44 | for(const auto& player : players) 45 | json["players"].append(player.to_json()); 46 | json["state"] = state; 47 | json["gamestate"] = gamestate; 48 | json["cur_turn"] = cur_turn; 49 | json["over"] = over; 50 | json["cnt"] = cnt; 51 | json["score"] = score; 52 | json["rounds"] = rounds; 53 | return json; 54 | } -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/game.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "player.h" 4 | 5 | class Game { 6 | public: 7 | enum State { 8 | READY = 0, 9 | PICK = 2, 10 | ASSERT = 3, 11 | ACTION = 4, 12 | END = 5 13 | }; 14 | static constexpr int STATE_LIMIT = 128; 15 | static std::string state_info(State state); 16 | int winner = -1; // 获胜玩家id 17 | int last_winner = -1; // 上一轮获胜玩家id 18 | std::vector players; 19 | int state = 1; // 当前回合(传递给Judger的) 20 | int last_round_state = 0; 21 | int gamestate = READY; // 当前阶段 22 | int cur_turn = 0; // 当前操作者 23 | int first_mover; // 当前轮的先手 24 | bool over = false; // 游戏是否结束 25 | int cnt = 0; 26 | int imiid[2] = {-1, -1}; 27 | int errorai = 0; 28 | int score = 0; // 记录比分 29 | int rounds = 0; // 记录战斗轮数 30 | Game(); 31 | Json::Value to_json() const; 32 | }; -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/logic/src/main -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "aqua_sdk.cpp" 2 | 3 | int main(int argc, char* argv[]){ 4 | AquaWarSDK aw; 5 | aw.start(); 6 | aw.run(); 7 | 8 | return 0; 9 | } -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/player.cpp: -------------------------------------------------------------------------------- 1 | #include "player.h" 2 | 3 | Player::Player(int _id):id(_id){ 4 | my_fish = FishSet(1); 5 | my_fish.player_id = _id; 6 | fight_fish = FishSet(); 7 | fight_fish.player_id = _id; 8 | } 9 | 10 | Player::Player(int _id, FishSet _my_fish):id(_id),my_fish(_my_fish){ 11 | my_fish = FishSet(1); 12 | fight_fish = FishSet(); 13 | } 14 | 15 | Player::Player(const Player &p){ 16 | id = p.id; 17 | type = p.type; 18 | my_fish = p.my_fish; 19 | fight_fish = p.fight_fish; 20 | } 21 | 22 | /* 23 | 按照成员变量定义顺序转成 JSON 24 | 样例: 25 | { 26 | "id": id, 27 | "type": type, 28 | "my_fish": my_fish, 29 | "fight_fish": fight_fish 30 | } 31 | */ 32 | Json::Value Player::to_json() const{ 33 | Json::Value json; 34 | json["id"] = id; 35 | json["type"] = type; 36 | json["my_fish"] = my_fish.to_json(); 37 | json["fight_fish"] = fight_fish.to_json(); 38 | return json; 39 | } 40 | 41 | int Player::get_id(){ 42 | return id; 43 | } 44 | 45 | std::vector Player::get_fishs() const{ 46 | return my_fish.get_fishs(); 47 | } 48 | 49 | std::vector Player::get_fight_fishs() const{ 50 | return fight_fish.get_fishs(); 51 | } 52 | 53 | int Player::get_size(){ 54 | return my_fish.get_size(); 55 | } 56 | 57 | bool Player::empty(){ 58 | return my_fish.empty(); 59 | } 60 | 61 | void Player::clear(){ 62 | my_fish.clear(); 63 | } 64 | 65 | void Player::add(Fish* fish){ 66 | my_fish.add(fish); 67 | } 68 | 69 | void Player::add(int id){ 70 | my_fish.add(id); 71 | } 72 | 73 | void Player::remove(int id){ 74 | my_fish.remove(id); 75 | } 76 | 77 | void Player::to_war(int id){ 78 | my_fish.remove(id); 79 | fight_fish.add(id); 80 | } -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/player.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "fishset.h" 3 | #include 4 | 5 | class Player { 6 | private: 7 | int id; // 玩家id 8 | 9 | public: 10 | int type = 1; // 1 => AI, 2 => 播放器 11 | FishSet my_fish; // 队伍里的所有剩余鱼 12 | FishSet fight_fish; // 当前上场战斗的鱼 13 | Player(int _id); 14 | Player(int _id, FishSet _my_fish); 15 | Player(const Player &p); 16 | Json::Value to_json() const; 17 | int get_id(); 18 | std::vector get_fishs() const; 19 | std::vector get_fight_fishs() const; 20 | int get_size(); 21 | bool empty(); 22 | void clear(); 23 | void add(Fish* fish); 24 | void add(int id); // 添加对应id的鱼 25 | void remove(int id); // 移除对应id的鱼 26 | void to_war(int id); // 编号为id的一只鱼上场战斗 27 | }; -------------------------------------------------------------------------------- /src/server/tasks/card_game/logic/src/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class Timer { 6 | public: 7 | using BaseClock = std::chrono::steady_clock; 8 | using TimePoint = BaseClock::time_point; 9 | using Duration = BaseClock::duration; 10 | 11 | Timer() : m_start(Timer::now()) {} 12 | int runtime() const { 13 | Duration time = Timer::now() - m_start; 14 | return std::chrono::duration_cast(time).count(); 15 | } 16 | static TimePoint now() { return BaseClock::now(); } 17 | 18 | private: 19 | TimePoint m_start; 20 | }; -------------------------------------------------------------------------------- /src/server/tasks/card_game/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | import socket 3 | 4 | from src.typings.general import ChatHistoryItem 5 | 6 | 7 | class Server: 8 | def __init__(self, port, workers, host="localhost"): 9 | self.host = host 10 | self.port = port 11 | self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 12 | 13 | while True: 14 | try: 15 | self.socket.bind((self.host, self.port)) 16 | break 17 | except OSError as e: 18 | self.port += 1 19 | self.socket.listen(workers + 2) 20 | self.log = {} 21 | self.status = {} 22 | 23 | async def start(self, folder, session): 24 | log_file = [] 25 | print(111) 26 | client_socket, client_address = self.socket.accept() 27 | print(222) 28 | while True: 29 | data = client_socket.recv(1000000).decode() 30 | if data == "": 31 | self.stop(client_socket) 32 | break 33 | elif data.startswith("#[ERROR]"): 34 | status = int(data[-1]) 35 | self.status[folder] = status 36 | else: 37 | try: 38 | session.history = json.loads(data) 39 | session.history = [ChatHistoryItem(**item) for item in session.history] 40 | log_file.append({"role": "user", "content": data}) 41 | ret = await session.action() 42 | if ret.content is None: 43 | self.status[folder] = 3 44 | self.send_message(client_socket, "### LLM ERROR EXIT ###") 45 | break 46 | else: 47 | ret = ret.content 48 | print("\n######\n") 49 | print(ret) 50 | log_file.append({"role": "agent", "content": ret}) 51 | print("sending message") 52 | self.send_message(client_socket, ret) 53 | print("message sent") 54 | except json.decoder.JSONDecodeError: 55 | log_file.append({"role": "agent", "content": ""}) 56 | print("except sending") 57 | self.send_message(client_socket, "") 58 | print("except message sent") 59 | self.log[folder] = log_file 60 | 61 | def send_message(self, client_socket, message): 62 | client_socket.sendall(message.encode()) 63 | 64 | def stop(self, client_socket): 65 | client_socket.close() 66 | -------------------------------------------------------------------------------- /src/server/tasks/card_game/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import signal 4 | import subprocess 5 | 6 | 7 | def run_cmd(cmd_string, timeout=600): 8 | print("命令为:" + cmd_string) 9 | p = subprocess.Popen( 10 | cmd_string, 11 | stderr=subprocess.PIPE, 12 | stdout=subprocess.PIPE, 13 | shell=True, 14 | close_fds=True, 15 | start_new_session=True, 16 | ) 17 | print("created") 18 | encoding = "utf-8" 19 | if platform.system() == "Windows": 20 | encoding = "gbk" 21 | 22 | try: 23 | print("trying") 24 | (msg, errs) = p.communicate(timeout=timeout) 25 | print("comed") 26 | ret_code = p.poll() 27 | print("polled") 28 | if ret_code: 29 | code = 1 30 | msg = "[Error]Called Error : " + str(msg.decode(encoding)) 31 | else: 32 | code = 0 33 | msg = str(msg.decode(encoding)) 34 | print(ret_code) 35 | except subprocess.TimeoutExpired: 36 | p.kill() 37 | p.terminate() 38 | os.killpg(p.pid, signal.SIGTERM) 39 | 40 | code = 1 41 | msg = ( 42 | "[ERROR]Timeout Error : Command '" 43 | + cmd_string 44 | + "' timed out after " 45 | + str(timeout) 46 | + " seconds" 47 | ) 48 | except Exception as e: 49 | code = 1 50 | msg = "[ERROR]Unknown Error : " + str(e) 51 | 52 | print("returning") 53 | 54 | return code, msg 55 | -------------------------------------------------------------------------------- /src/server/tasks/dbbench/requirements.txt: -------------------------------------------------------------------------------- 1 | mysql-connector-python==8.0.33 2 | docker==6.1.2 -------------------------------------------------------------------------------- /src/server/tasks/knowledgegraph/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import KnowledgeGraph -------------------------------------------------------------------------------- /src/server/tasks/knowledgegraph/requirements.txt: -------------------------------------------------------------------------------- 1 | SPARQLWrapper 2 | networkx -------------------------------------------------------------------------------- /src/server/tasks/knowledgegraph/utils/semparse_util.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | def lisp_to_nested_expression(lisp_string: str) -> List: 5 | """ 6 | Takes a logical form as a lisp string and returns a nested list representation of the lisp. 7 | For example, "(count (division first))" would get mapped to ['count', ['division', 'first']]. 8 | """ 9 | stack: List = [] 10 | current_expression: List = [] 11 | tokens = lisp_string.split() 12 | for token in tokens: 13 | while token[0] == '(': 14 | nested_expression: List = [] 15 | current_expression.append(nested_expression) 16 | stack.append(current_expression) 17 | current_expression = nested_expression 18 | token = token[1:] 19 | current_expression.append(token.replace(')', '')) 20 | while token[-1] == ')': 21 | current_expression = stack.pop() 22 | token = token[:-1] 23 | return current_expression[0] 24 | 25 | def expression_to_lisp(expression) -> str: 26 | rtn = '(' 27 | for i, e in enumerate(expression): 28 | if isinstance(e, list): 29 | rtn += expression_to_lisp(e) 30 | else: 31 | rtn += e 32 | if i != len(expression) - 1: 33 | rtn += ' ' 34 | 35 | rtn += ')' 36 | return rtn 37 | 38 | 39 | def get_nesting_level(expression) -> int: 40 | max_sub = 0 41 | for item in expression: 42 | if isinstance(item, list): 43 | level = get_nesting_level(item) 44 | if level > max_sub: 45 | max_sub = level 46 | 47 | return 1 + max_sub 48 | 49 | 50 | 51 | if __name__ == '__main__': 52 | lisp = '(AND common.topic (AND (JOIN common.topic.notable_types Comic Strip) (JOIN common.topic.notable_types Comic Strip)))' 53 | print(get_nesting_level(lisp_to_nested_expression(lisp))) 54 | 55 | print(expression_to_lisp(lisp_to_nested_expression(lisp))) 56 | -------------------------------------------------------------------------------- /src/server/tasks/ltp/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import LateralThinkingPuzzle -------------------------------------------------------------------------------- /src/server/tasks/mind2web/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import Mind2Web 2 | -------------------------------------------------------------------------------- /src/server/tasks/mind2web/data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/mind2web/data_utils/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/os_interaction/__init__.py: -------------------------------------------------------------------------------- 1 | from .task import OSInteraction -------------------------------------------------------------------------------- /src/server/tasks/os_interaction/requirements.txt: -------------------------------------------------------------------------------- 1 | docker 2 | PyYAML 3 | PyYAML 4 | Requests 5 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## I'm submitting a ... 2 | 3 | - [ ] bug report 4 | - [ ] feature request 5 | 6 | ## What is the current behavior? 7 | 8 | Please describe the current behavior of the WebShop app or agent 9 | 10 | ### Steps to Reproduce 11 | 12 | If the current behavior is a bug, please provide the steps to reproduce and if possible a minimal demo of the problem 13 | 14 | ## What is the expected behavior? 15 | 16 | Please describe the desired behavior of the WebShop app or agent 17 | 18 | ## Motivation for Change 19 | 20 | What is the motivation / use case for changing the behavior? 21 | 22 | ## Please tell us about your environment: 23 | 24 | * Version: 2.0.0-beta.X 25 | * Browser: 26 | * Language: 27 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Provide a general summary of your changes 4 | 5 | ## Description of Changes 6 | 7 | Describe your changes + testing (if appropriate) in technical detail 8 | 9 | ## Screenshots 10 | 11 | Include visuals such as screenshots or recordings if necessary to show changes in effect 12 | 13 | ## Checklist 14 | - [ ] My code follows the style guidelines of this project 15 | - [ ] I have performed a self-review of my code 16 | - [ ] I have commented my code + updated documentation (if necessary) 17 | - [ ] I have added tests to define the behavior of the feature(s) and verify it is working 18 | - [ ] New + existing unit tests pass -------------------------------------------------------------------------------- /src/server/tasks/webshop/.github/workflows/pytest.yml: -------------------------------------------------------------------------------- 1 | # .github/workflows/pytest.yml 2 | name: PyTest 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | timeout-minutes: 10 16 | 17 | steps: 18 | - name: Check out repository code 19 | uses: actions/checkout@v3 20 | 21 | # Setup Python (faster than using Python container) 22 | - name: Setup Python 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.8" 26 | 27 | # Install pip dependencies + setup for testing 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install -r requirements.txt 32 | python -m spacy download en_core_web_lg 33 | 34 | # Run testing suite 35 | - name: Run test suite 36 | run: | 37 | pytest -v -------------------------------------------------------------------------------- /src/server/tasks/webshop/.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb* 2 | *.pyc 3 | *.swp 4 | 5 | .DS_Store 6 | .idea/ 7 | .pytest_cache/ 8 | .vscode/ 9 | 10 | __pycache__/ 11 | data/ 12 | search_engine/indexes* 13 | search_engine/resources* 14 | transfer/flagged 15 | user_session_logs/ 16 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Princeton Natural Language Processing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/assets/diagram.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/assets/diagram.gif -------------------------------------------------------------------------------- /src/server/tasks/webshop/assets/model_ckpts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/assets/model_ckpts.png -------------------------------------------------------------------------------- /src/server/tasks/webshop/assets/transfer-logic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/assets/transfer-logic.png -------------------------------------------------------------------------------- /src/server/tasks/webshop/baseline_models/.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | wandb/ 3 | ckpts/ 4 | __pycache__/ 5 | data/il_trajs_finalized_images.jsonl 6 | *.ipynb 7 | 8 | 9 | *.txt 10 | !requirements.txt 11 | scripts/ 12 | *.out 13 | wandb/ 14 | *.swp 15 | logs/ 16 | .DS_Store 17 | .idea/ 18 | nbs/ 19 | 20 | crawl-* 21 | 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | *.egg-info/ 45 | .installed.cfg 46 | *.egg 47 | MANIFEST 48 | 49 | # PyInstaller 50 | # Usually these files are written by a python script from a template 51 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 52 | *.manifest 53 | *.spec 54 | 55 | # Installer logs 56 | pip-log.txt 57 | pip-delete-this-directory.txt 58 | 59 | # Unit test / coverage reports 60 | htmlcov/ 61 | .tox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | .hypothesis/ 69 | .pytest_cache/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | # Jupyter Notebook 94 | .ipynb_checkpoints 95 | 96 | # pyenv 97 | .python-version 98 | 99 | # celery beat schedule file 100 | celerybeat-schedule 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/baseline_models/generate_search.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | import torch 5 | from tqdm import tqdm 6 | from transformers import BartForConditionalGeneration 7 | 8 | from train_search import get_data, get_dataset, tokenizer 9 | 10 | if __name__ == "__main__": 11 | model = BartForConditionalGeneration.from_pretrained( 12 | './ckpts/web_search/checkpoint-800') 13 | model.eval() 14 | model = model.to('cuda') 15 | dataset = get_dataset("web_search") 16 | dataloader = torch.utils.data.DataLoader(dataset["all"], batch_size=32) 17 | _, all_goals = get_data("all") 18 | all_dec = [] 19 | for batch in tqdm(dataloader): 20 | output = model.generate( 21 | input_ids=batch["input_ids"].to('cuda'), 22 | attention_mask=batch["attention_mask"].to('cuda'), 23 | num_beams=10, num_return_sequences=10, 24 | max_length=512, early_stopping=True 25 | ) 26 | dec = tokenizer.batch_decode( 27 | output, skip_special_tokens=True, clean_up_tokenization_spaces=False) 28 | assert len(dec) % 10 == 0 29 | for i in range(len(dec) // 10): 30 | all_dec.append(dec[i*10:(i+1)*10]) 31 | assert len(all_goals) == len(all_dec) 32 | d = {goal: dec for goal, dec in zip(all_goals, all_dec)} 33 | with open('./data/goal_query_predict.json', 'w') as f: 34 | json.dump(d, f) 35 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/baseline_models/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets 3 | faiss-gpu 4 | transformers 5 | wandb -------------------------------------------------------------------------------- /src/server/tasks/webshop/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/conftest.py -------------------------------------------------------------------------------- /src/server/tasks/webshop/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.11.1 2 | cleantext==1.1.4 3 | env==0.1.0 4 | Flask==2.1.2 5 | gdown 6 | gradio 7 | gym==0.24.0 8 | numpy==1.22.4 9 | pandas==1.4.2 10 | pyserini==0.17.0 11 | pytest 12 | PyYAML==6.0 13 | rank_bm25==0.2.2 14 | requests==2.27.1 15 | requests_mock 16 | rich==12.4.4 17 | scikit_learn==1.1.1 18 | selenium==4.2.0 19 | spacy 20 | thefuzz==0.19.0 21 | torch==1.11.0 22 | tqdm==4.64.0 23 | train==0.0.5 24 | transformers==4.19.2 25 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/run_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export FLASK_ENV=development 3 | python -m web_agent_site.app --log --attrs 4 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/run_envs/run_web_agent_site_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the site gym environment. 3 | 4 | TODO: move to testing dir for more rigorous tests 5 | """ 6 | import gym 7 | from rich import print 8 | from rich.markup import escape 9 | 10 | from web_agent_site.envs import WebAgentSiteEnv 11 | from web_agent_site.models import * 12 | from web_agent_site.utils import DEBUG_PROD_SIZE 13 | 14 | 15 | if __name__ == '__main__': 16 | #env = gym.make('WebAgentSite-v0') 17 | #env = WebAgentSiteEnv(render=True, pause=2.0) 18 | #env = WebAgentSiteEnv(observation_mode='html', render=False) 19 | env = WebAgentSiteEnv(observation_mode='text', render=False, num_products=DEBUG_PROD_SIZE) 20 | global_step = 0 21 | 22 | try: 23 | #policy = HumanPolicy() 24 | policy = GPTPolicy() 25 | 26 | observation = env.observation 27 | while True: 28 | print(observation) 29 | available_actions = env.get_available_actions() 30 | print('Available actions:', available_actions) 31 | action = policy.forward(observation, available_actions) 32 | observation, reward, done, info = env.step(action) 33 | print(f'Taking action "{escape(action)}" -> Reward = {reward}') 34 | if done: 35 | break 36 | global_step += 1 37 | finally: 38 | env.close() -------------------------------------------------------------------------------- /src/server/tasks/webshop/run_envs/run_web_agent_text_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the text gym environment. 3 | 4 | TODO: move to testing dir for more rigorous tests 5 | """ 6 | import datetime 7 | import os 8 | import sys 9 | 10 | from rich import print 11 | from rich.markup import escape 12 | 13 | from web_agent_site.envs import WebAgentTextEnv 14 | from web_agent_site.models import * 15 | 16 | model_exec = "" 17 | 18 | 19 | class InteractionLog: 20 | def __init__(self, file, name) -> None: 21 | self.file = file 22 | self.name = name 23 | self.suffix_index = 0 24 | while os.path.exists(self.file_name): 25 | self.suffix_index += 1 26 | self.stdout = None 27 | self.logfile = None 28 | 29 | @property 30 | def file_name(self): 31 | return self.file + "-" + str(self.suffix_index) + ".log" 32 | 33 | def __enter__(self): 34 | self.logfile = open(self.file_name, 'w', encoding="utf-8") 35 | self.stdout = sys.stdout 36 | sys.stdout = self.logfile 37 | return self 38 | 39 | def __exit__(self, exc_type, exc_value, traceback): 40 | sys.stdout = self.stdout 41 | self.logfile.close() 42 | self.logfile = None 43 | self.stdout = None 44 | 45 | 46 | def worker(log_file, idx, rnge): 47 | with InteractionLog(log_file, idx): 48 | env = WebAgentTextEnv(observation_mode="text", human_goals=True) 49 | print("total goals:", len(env.server.goals)) 50 | print("ranging:", rnge) 51 | scores = [] 52 | for i in range(*rnge): 53 | env.reset(i) 54 | print(f"=== Episode #{i} ===") 55 | 56 | policy = eval(model_exec) 57 | 58 | observation = env.observation 59 | for j in range(100): 60 | print(observation) 61 | available_actions = env.get_available_actions() 62 | print('Available actions:', available_actions) 63 | action = policy.forward(observation, available_actions) 64 | if not action: 65 | reward = 0 66 | break 67 | observation, reward, done, info = env.step(action) 68 | print(f'Taking action "{escape(action)}" -> Reward = {reward}') 69 | if done: 70 | break 71 | else: 72 | reward = 0 73 | print(f"#{i} {reward}") 74 | scores.append(reward) 75 | 76 | print(f"#Average: {sum(scores) / len(scores)}") 77 | 78 | 79 | if __name__ == '__main__': 80 | # env = gym.make('WebAgentTextEnv-v0', observation_mode='text', num_products=DEBUG_PROD_SIZE) 81 | arg_length = len(sys.argv) 82 | if arg_length == 1: 83 | ranging = (0, 12087) 84 | elif arg_length == 2: 85 | ranging = (int(sys.argv[1]), int(sys.argv[1]) + 1) 86 | elif arg_length == 3: 87 | ranging = (int(sys.argv[1]), int(sys.argv[2])) 88 | else: 89 | ranging = (0, 12087) 90 | model_exec = input(">>> ") 91 | print("got EXEC", model_exec) 92 | log_file = "logs/%s" % (datetime.datetime.now().strftime("%Y-%m-%d=%H-%M-%S")) 93 | worker(log_file, 0, ranging) 94 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/run_prod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m web_agent_site.app --log 3 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/run_web_agent_site_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m run_envs.run_web_agent_site_env 3 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/run_web_agent_text_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m run_envs.run_web_agent_text_env 3 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/search_engine/convert_product_file_format.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | from tqdm import tqdm 4 | sys.path.insert(0, '../') 5 | 6 | from web_agent_site.utils import DEFAULT_FILE_PATH 7 | from web_agent_site.engine.engine import load_products 8 | 9 | all_products, *_ = load_products(filepath=DEFAULT_FILE_PATH) 10 | 11 | 12 | docs = [] 13 | for p in tqdm(all_products, total=len(all_products)): 14 | option_texts = [] 15 | options = p.get('options', {}) 16 | for option_name, option_contents in options.items(): 17 | option_contents_text = ', '.join(option_contents) 18 | option_texts.append(f'{option_name}: {option_contents_text}') 19 | option_text = ', and '.join(option_texts) 20 | 21 | doc = dict() 22 | doc['id'] = p['asin'] 23 | doc['contents'] = ' '.join([ 24 | p['Title'], 25 | p['Description'], 26 | p['BulletPoints'][0], 27 | option_text, 28 | ]).lower() 29 | doc['product'] = p 30 | docs.append(doc) 31 | 32 | 33 | with open('./resources_100/documents.jsonl', 'w+') as f: 34 | for doc in docs[:100]: 35 | f.write(json.dumps(doc) + '\n') 36 | 37 | with open('./resources/documents.jsonl', 'w+') as f: 38 | for doc in docs: 39 | f.write(json.dumps(doc) + '\n') 40 | 41 | with open('./resources_1k/documents.jsonl', 'w+') as f: 42 | for doc in docs[:1000]: 43 | f.write(json.dumps(doc) + '\n') 44 | 45 | with open('./resources_100k/documents.jsonl', 'w+') as f: 46 | for doc in docs[:100000]: 47 | f.write(json.dumps(doc) + '\n') 48 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/search_engine/lucene_searcher.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pyserini.search.lucene import LuceneSearcher 3 | from rich import print 4 | 5 | 6 | searcher = LuceneSearcher('indexes') 7 | hits = searcher.search('rubber sole shoes', k=20) 8 | 9 | for hit in hits: 10 | doc = searcher.doc(hit.docid) 11 | print(doc) 12 | obj = json.loads(doc.raw())['product']['Title'] 13 | print(obj) 14 | 15 | print(len(hits)) 16 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/search_engine/run_indexing.sh: -------------------------------------------------------------------------------- 1 | python -m pyserini.index.lucene \ 2 | --collection JsonCollection \ 3 | --input resources_100 \ 4 | --index indexes_100 \ 5 | --generator DefaultLuceneDocumentGenerator \ 6 | --threads 1 \ 7 | --storePositions --storeDocvectors --storeRaw 8 | 9 | python -m pyserini.index.lucene \ 10 | --collection JsonCollection \ 11 | --input resources \ 12 | --index indexes \ 13 | --generator DefaultLuceneDocumentGenerator \ 14 | --threads 1 \ 15 | --storePositions --storeDocvectors --storeRaw 16 | 17 | python -m pyserini.index.lucene \ 18 | --collection JsonCollection \ 19 | --input resources_1k \ 20 | --index indexes_1k \ 21 | --generator DefaultLuceneDocumentGenerator \ 22 | --threads 1 \ 23 | --storePositions --storeDocvectors --storeRaw 24 | 25 | python -m pyserini.index.lucene \ 26 | --collection JsonCollection \ 27 | --input resources_100k \ 28 | --index indexes_100k \ 29 | --generator DefaultLuceneDocumentGenerator \ 30 | --threads 1 \ 31 | --storePositions --storeDocvectors --storeRaw 32 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Displays information on how to use script 4 | helpFunction() 5 | { 6 | echo "Usage: $0 [-d small|all]" 7 | echo -e "\t-d small|all - Specify whether to download entire dataset (all) or just 1000 (small)" 8 | exit 1 # Exit script after printing help 9 | } 10 | 11 | # Get values of command line flags 12 | while getopts d: flag 13 | do 14 | case "${flag}" in 15 | d) data=${OPTARG};; 16 | esac 17 | done 18 | 19 | if [ -z "$data" ]; then 20 | echo "[ERROR]: Missing -d flag" 21 | helpFunction 22 | fi 23 | 24 | # Install Python Dependencies 25 | pip install -r requirements.txt; 26 | 27 | # Install Environment Dependencies via `conda` 28 | conda install -c pytorch faiss-cpu==1.7.2; 29 | conda install -c conda-forge openjdk=11; 30 | 31 | # Download dataset into `data` folder via `gdown` command 32 | mkdir -p data; 33 | cd data; 34 | if [ "$data" == "small" ]; then 35 | gdown https://drive.google.com/uc?id=1EgHdxQ_YxqIQlvvq5iKlCrkEKR6-j0Ib; # items_shuffle_1000 - product scraped info 36 | gdown https://drive.google.com/uc?id=1IduG0xl544V_A_jv3tHXC0kyFi7PnyBu; # items_ins_v2_1000 - product attributes 37 | elif [ "$data" == "all" ]; then 38 | gdown https://drive.google.com/uc?id=1A2whVgOO0euk5O13n2iYDM0bQRkkRduB; # items_shuffle 39 | gdown https://drive.google.com/uc?id=1s2j6NgHljiZzQNL3veZaAiyW_qDEgBNi; # items_ins_v2 40 | else 41 | echo "[ERROR]: argument for `-d` flag not recognized" 42 | helpFunction 43 | fi 44 | gdown https://drive.google.com/uc?id=14Kb5SPBk_jfdLZ_CDBNitW98QLDlKR5O # items_human_ins 45 | cd .. 46 | 47 | # Download spaCy large NLP model 48 | python -m spacy download en_core_web_lg 49 | 50 | # Build search engine index 51 | cd search_engine 52 | mkdir -p resources resources_100 resources_1k resources_100k 53 | python convert_product_file_format.py # convert items.json => required doc format 54 | mkdir -p indexes 55 | ./run_indexing.sh 56 | cd .. 57 | 58 | # Create logging folder + samples of log data 59 | get_human_trajs () { 60 | PYCMD=$(cat < 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 |
15 |
16 |
17 |

Instruction:
i want an xx-small sized slim fit button down shirt with long sleeves. pick something in white, and price lower than 50.00 dollars

18 |
19 |
20 |
21 |
22 |
23 | 24 |
25 |
26 |
27 |
28 | 29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |

Here Are The Things You Want To Knowa─=≡Σ(((つ̀ώ)つSTORE INTRODUCTION:>>>>Our store helps our customers improve their quality of life~As a distributor, we value quality and service.Focus on the high quality and durability of the product.Committed to creating a store that satisfies and reassures our customers.TIPS:>>>>1. Please allow minor errors in the data due to manual measurements.2. Due to the color settings of the display, the actual color may be slightly different from the online image.QUALITY PROMISE:>>>>Our goal is to continuously provide a range of quality products.We place a huge emphasis on the values of quality and reliability.We have always insisted on fulfilling this commitment.In short, we want our customers to have the same great product experience every time and be trusted to deliver on this commitment.Please give us a chance to serve you.OTHER:>>>>athletic sneaker laces athletic sneakers white athletic sneakers for women clearance leather Sneaker leather sneakers women leather sneakers for menleather sneaker laces leather sneaker platform basketball shoes basketball shoes for men basketball shoe laces basketball shoe grip basketball shoes for women fitness shoes for men fitness shoes women workout fitness shoes women fitness shoes women size 5 fitness shoes men workout fitness shoes for men high top sneakers for women walking shoes sneakers with arch support for women

37 |
38 |
39 |
40 |
41 |
42 |
43 | 44 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/tests/web-agent-site/engine/test_normalize.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from web_agent_site.engine.normalize import * 3 | 4 | def test_normalize_color(): 5 | suite = [ 6 | ("", ""), 7 | ("black forest", "black"), 8 | ("violet lavender", "lavender"), 9 | ("steelivy fuchsia", "fuchsia"), 10 | ("123alabaster", "alabaster"), 11 | ("webshop", "webshop") 12 | ] 13 | for color_string, expected in suite: 14 | output = normalize_color(color_string) 15 | assert type(output) is str 16 | assert output == expected 17 | 18 | def test_normalize_color_size(): 19 | product_prices = { 20 | (1, "black forest", "3 meter"): 10.29, 21 | (2, "violet lavender", "xx-large"): 23.42, 22 | (3, "steelivy fuchsia", "random value"): 193.87, 23 | (4, "123alabaster", "40cm plus"): 67.23, 24 | (5, "webshop", "142"): 1.02, 25 | (6, "webshopsteel", "2 petite"): 57.99, 26 | (7, "leather black", "91ft walnut feet"): 6.20, 27 | } 28 | color_mapping_expected = { 29 | 'N.A.': 'not_matched', 30 | "black forest": "black", 31 | "violet lavender": "lavender", 32 | "steelivy fuchsia": "fuchsia", 33 | "123alabaster": "alabaster", 34 | "webshop": "not_matched", 35 | "webshopsteel": "steel", 36 | "leather black": "black" 37 | } 38 | size_mapping_expected = { 39 | 'N.A.': 'not_matched', 40 | "3 meter": '(.*)meter', 41 | "xx-large": 'xx-large', 42 | "random value": "not_matched", 43 | "40cm plus": '(.*)plus', 44 | "142": "numeric_size", 45 | "2 petite": "(.*)petite", 46 | "91ft walnut feet": '(.*)ft', 47 | } 48 | 49 | color_mapping, size_mapping = normalize_color_size(product_prices) 50 | assert type(color_mapping) == dict 51 | assert type(size_mapping) == dict 52 | assert color_mapping == color_mapping_expected 53 | assert size_mapping == size_mapping_expected 54 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/tests/web-agent-site/test_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pytest 3 | import random 4 | import shutil 5 | from pathlib import Path 6 | from web_agent_site.utils import * 7 | 8 | def test_random_idx(): 9 | random.seed(24) 10 | weights = [random.randint(0, 10) for _ in range(0, 50)] 11 | cml_weights = [0] 12 | for w in weights: 13 | cml_weights.append(cml_weights[-1] + w) 14 | idx_1, expected_1 = random_idx(cml_weights), 44 15 | idx_2, expected_2 = random_idx(cml_weights), 15 16 | idx_3, expected_3 = random_idx(cml_weights), 36 17 | assert idx_1 == expected_1 18 | assert idx_2 == expected_2 19 | assert idx_3 == expected_3 20 | 21 | def test_setup_logger(): 22 | LOG_DIR = 'user_session_logs_test/' 23 | user_log_dir = Path(LOG_DIR) 24 | user_log_dir.mkdir(parents=True, exist_ok=True) 25 | session_id = "ABC" 26 | 27 | logger = setup_logger(session_id, user_log_dir) 28 | log_file = Path(LOG_DIR + "/" + session_id + ".jsonl") 29 | assert Path(log_file).is_file() 30 | assert logger.level == logging.INFO 31 | 32 | content = "Hello there" 33 | logger.info(content) 34 | assert log_file.read_text().strip("\n") == content 35 | 36 | shutil.rmtree(LOG_DIR) 37 | 38 | def test_generate_mturk_code(): 39 | suite = [ 40 | ('', 'DA39A3EE5E'), 41 | ('ABC', '3C01BDBB26'), 42 | ('123', '40BD001563'), 43 | ('1A1', '10E7DB0A44'), 44 | ('$%^ABC', '5D5607D24E') 45 | ] 46 | for session_id, expected in suite: 47 | output = generate_mturk_code(session_id) 48 | assert type(expected) is str 49 | assert output == expected -------------------------------------------------------------------------------- /src/server/tasks/webshop/transfer/README.md: -------------------------------------------------------------------------------- 1 | # Sim-to-real Transfer 2 | This folder contains code for transferring agents trained on WebShop to perform on third party websites, specifically [Amazon](http://amazon.com) and [eBay](http://ebay.com). The imitation learning and reinforcement learning agents exercised by the transfer code can be found on WebShop's Hugging Face [page](https://huggingface.co/webshop). 3 | 4 | Interact with a demo of the transfer code, deployed as a 🤗 Hugging Face space [here](https://huggingface.co/spaces/webshop/amazon_shop)! 5 | 6 | ## 🛠️ Usage 7 | The Gradio app deployed as the aforementioned Hugging Face space can be started locally by running `python app.py` in this folder. The initial `setup.sh` script should have installed all the required dependencies. 8 | 9 | ## ➡️ Transfer Logic 10 | The Sim-to-real transfer code follows this general logical flow: 11 | 12 | 13 | 14 | The contents of this directory each serve the following purposes: 15 | * `app.py`: Run to launch interactive [Gradio](https://gradio.app/) demo of app 16 | * `predict_help.py`: Amazon, eBay web scraping code 17 | * `webshop_lite.py`: A condensed version of WebShop's templating engine 18 | 19 | If you are interested in *transferring an agent's functionality to an new website or platform*, you will need to... 20 | 1. implement two new functions: `parse_results_.py` and `parse_item_page_.py`. The corresponding interfaces and working examples for Amazon can be found [here](https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L262) and [here](https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L296). 21 | 2. Invoke these functions in the [`run_episode`](https://github.com/princeton-nlp/webshop/tree/master/transfer/app.py#L105) function in the `app.py` file. Specifically, you should add a single call to... 22 | * `parse_results...` in the [conditional]((https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L220)) handling `Page.RESULTS` page types 23 | * `parse_item_page...` in the [conditional]((https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L240)) handling `Page.ITEMS` page types -------------------------------------------------------------------------------- /src/server/tasks/webshop/transfer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/transfer/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/web_agent_site/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/attributes/annotate.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from pathlib import Path 3 | from rich import print 4 | 5 | ATTR_DIR = './data/attributes' 6 | 7 | ATTR_PATHS = [ 8 | 'narrow_2-gram.yaml', 9 | 'narrow_1-gram.yaml', 10 | 'broad_2-gram.yaml', 11 | 'broad_1-gram.yaml', 12 | ] 13 | ATTR_PATHS = [Path(ATTR_DIR) / af for af in ATTR_PATHS] 14 | 15 | 16 | def annotate(attr_path): 17 | with open(attr_path) as f: 18 | attrs_by_cat = yaml.safe_load(f) 19 | 20 | unique_attrs = set() 21 | all_attrs = [] 22 | for _, attrs in attrs_by_cat.items(): 23 | attrs = [a.split('|')[0].strip() for a in attrs] 24 | unique_attrs.update(attrs) 25 | all_attrs += attrs 26 | print(f'Total unique attributes: {len(unique_attrs)}') 27 | total = len(all_attrs) 28 | num_left = len(all_attrs) 29 | 30 | annotated_attrs_by_cat = dict() 31 | for category, attrs in attrs_by_cat.items(): 32 | print( 33 | f'Category: [ {category} ] | ' 34 | f'Number of attributes: {len(attrs)}\n' 35 | ) 36 | annotated_attrs = [] 37 | for i, attr in enumerate(attrs): 38 | attr, score = attr.split(' | ') 39 | print( 40 | f'{"[" + str(i) + "]":<5} ' 41 | f'[bold green]{attr:<30}[/bold green] | ' 42 | f'[red]{category}[/red] | ' 43 | f'{score}' 44 | ) 45 | tags = input( 46 | 'Annotate [1: ITEM, 2: PROP, 3: USE, ' 47 | '⎵: next example, q: next category] > ' 48 | ) 49 | print('\n') 50 | tags = tags.strip() 51 | annotated_attrs.append(f'{attr} | {score} | {tags}') 52 | if 'q' in tags: 53 | break 54 | 55 | num_left -= len(attrs) 56 | print(f'{num_left} / {total} total attributes left.') 57 | 58 | ans = input('Starting the next category... [y/n] > ') 59 | if ans == 'n': 60 | break 61 | 62 | def main(): 63 | for attr_path in ATTR_PATHS: 64 | annotate(attr_path) 65 | 66 | if __name__ == '__main__': 67 | """ 68 | python -m web_agent_site.attributes.annotate 69 | """ 70 | main() 71 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/web_agent_site/engine/__init__.py -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | from web_agent_site.envs.web_agent_site_env import WebAgentSiteEnv 4 | from web_agent_site.envs.web_agent_text_env import WebAgentTextEnv 5 | 6 | register( 7 | id='WebAgentSiteEnv-v0', 8 | entry_point='web_agent_site.envs:WebAgentSiteEnv', 9 | ) 10 | 11 | register( 12 | id='WebAgentTextEnv-v0', 13 | entry_point='web_agent_site.envs:WebAgentTextEnv', 14 | ) -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/models/__init__.py: -------------------------------------------------------------------------------- 1 | from web_agent_site.models.models import * 2 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/models/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model implementations. The model interface should be suitable for both 3 | the ``site env'' and the ``text env''. 4 | """ 5 | import json 6 | import random 7 | import re 8 | 9 | import requests 10 | 11 | random.seed(4) 12 | 13 | 14 | class BasePolicy: 15 | def __init__(self): 16 | pass 17 | 18 | def forward(self, observation, available_actions): 19 | """ 20 | Args: 21 | observation (`str`): 22 | HTML string 23 | 24 | available_actions (): 25 | ... 26 | Returns: 27 | action (`str`): 28 | Return string of the format ``action_name[action_arg]''. 29 | Examples: 30 | - search[white shoes] 31 | - click[button=Reviews] 32 | - click[button=Buy Now] 33 | """ 34 | raise NotImplementedError 35 | 36 | 37 | class HumanPolicy(BasePolicy): 38 | def __init__(self): 39 | super().__init__() 40 | 41 | def forward(self, observation, available_actions): 42 | action = input('> ') 43 | return action 44 | 45 | 46 | class RandomPolicy(BasePolicy): 47 | def __init__(self): 48 | super().__init__() 49 | 50 | def forward(self, observation, available_actions): 51 | if available_actions['has_search_bar']: 52 | action = 'search[shoes]' 53 | else: 54 | action_arg = random.choice(available_actions['clickables']) 55 | action = f'click[{action_arg}]' 56 | return action 57 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/static/images/no-image-available.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/web_agent_site/static/images/no-image-available.png -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/templates/attributes_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 |

Instruction:
{{ instruction_text }}

17 |
18 |
19 |
20 |
21 |
22 | 23 |
24 |
25 |
26 |
27 | 28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
    36 | {% for attribute in product_info.Attributes %} 37 |
  • {{attribute}}

  • 38 | {% endfor %} 39 |
40 |
41 |
42 |
43 |
44 |
{{product_info.category}}
45 |
46 |
47 |
{{product_info.query}}
48 |
49 |
50 |
{{product_info.product_category}}
51 |
52 |
53 |
54 |
55 |
56 |
57 | 58 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/templates/description_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 |

Instruction:
{{ instruction_text }}

17 |
18 |
19 |
20 |
21 |
22 | 23 |
24 |
25 |
26 |
27 | 28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |

{{product_info.Description}}

36 |
37 |
38 |
39 |
40 |
41 |
42 | 43 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/templates/done_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 |
13 |
14 |
15 |

Thank you for shopping with us!

16 |
17 |
18 |

Your code:

19 |

{{ mturk_code }}
(Paste it in your MTurk interface.)

20 |
21 |

Purchased

22 |
23 |

asin
{{ asin }}

24 |

options
{{ options | tojson }}

25 |

attrs
{{ purchased_attrs }}

26 |

category
{{ category }}

27 |

query
{{ query }}

28 |

product category
{{ product_category }}

29 |

Target

30 |
31 |

asin
{{ goal.asin }}

32 |

options
{{ goal.goal_options }}

33 |

attrs
{{ goal.attributes }}

34 |

price upper
{{ goal.price_upper }}

35 |

instuction text
{{ goal.instruction_text }}

36 |

category
{{ goal.category }}

37 |

product category
{{ goal.product_category }}

38 |

query
{{ goal.query }}

39 |

Goal
{{ goal | pprint }}

40 |

Reward

41 |
42 |
43 |

Your score (min 0.0, max 1.0)
{{ reward }}

44 | 45 |
46 |
47 |
48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/templates/features_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 |

Instruction:
{{ instruction_text }}

17 |
18 |
19 |
20 |
21 |
22 | 23 |
24 |
25 |
26 |
27 | 28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
    36 | {% for bulletpoint in product_info.BulletPoints %} 37 |
  • {{bulletpoint}}

  • 38 | {% endfor %} 39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | 47 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/templates/review_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 |

Instruction:
{{ instruction_text }}

17 |
18 |
19 |
20 |
21 |
22 | 23 |
24 |
25 |
26 |
27 | 28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 | {% for review in product_info.Reviews %} 36 |
37 |
38 |

"{{review.title}}"

39 |

40 | {{review.score}} 41 | {% for i in range(review.score | int) %} 42 | 43 | {% endfor %} 44 | {% for i in range(5 - review.score | int) %} 45 | 46 | {% endfor %} 47 |

48 |

{{review.body}}

49 |
50 |
51 | {% endfor %} 52 |
53 |
54 |
55 |
56 |
57 |
58 | 59 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/templates/search_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 |
13 |
14 | 17 |
18 |

Instruction:
{{ instruction_text }}

19 |
20 |
21 |
22 |
23 | 24 | 25 | 26 | 27 |
28 |
29 |
30 |
31 |
32 |
33 | 34 | -------------------------------------------------------------------------------- /src/server/tasks/webshop/web_agent_site/utils.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import hashlib 3 | import logging 4 | import random 5 | from os.path import dirname, abspath, join 6 | 7 | BASE_DIR = dirname(abspath(__file__)) 8 | DEBUG_PROD_SIZE = None # set to `None` to disable 9 | 10 | DEFAULT_ATTR_PATH = join(BASE_DIR, '../data/items_ins_v2_1000.json') 11 | # DEFAULT_ATTR_PATH = join(BASE_DIR, '../data/items_ins_v2.json') 12 | DEFAULT_FILE_PATH = join(BASE_DIR, '../data/items_shuffle_1000.json') 13 | # DEFAULT_FILE_PATH = join(BASE_DIR, '../data/items_shuffle.json') 14 | DEFAULT_REVIEW_PATH = join(BASE_DIR, '../data/reviews.json') 15 | 16 | FEAT_CONV = join(BASE_DIR, '../data/feat_conv.pt') 17 | FEAT_IDS = join(BASE_DIR, '../data/feat_ids.pt') 18 | 19 | HUMAN_ATTR_PATH = join(BASE_DIR, '../data/items_human_ins.json') 20 | HUMAN_ATTR_PATH = join(BASE_DIR, '../data/items_human_ins.json') 21 | 22 | def random_idx(cum_weights): 23 | """Generate random index by sampling uniformly from sum of all weights, then 24 | selecting the `min` between the position to keep the list sorted (via bisect) 25 | and the value of the second to last index 26 | """ 27 | pos = random.uniform(0, cum_weights[-1]) 28 | idx = bisect.bisect(cum_weights, pos) 29 | idx = min(idx, len(cum_weights) - 2) 30 | return idx 31 | 32 | def setup_logger(session_id, user_log_dir): 33 | """Creates a log file and logging object for the corresponding session ID""" 34 | logger = logging.getLogger(session_id) 35 | formatter = logging.Formatter('%(message)s') 36 | file_handler = logging.FileHandler( 37 | user_log_dir / f'{session_id}.jsonl', 38 | mode='w' 39 | ) 40 | file_handler.setFormatter(formatter) 41 | logger.setLevel(logging.INFO) 42 | logger.addHandler(file_handler) 43 | return logger 44 | 45 | def generate_mturk_code(session_id: str) -> str: 46 | """Generates a redeem code corresponding to the session ID for an MTurk 47 | worker once the session is completed 48 | """ 49 | sha = hashlib.sha1(session_id.encode()) 50 | return sha.hexdigest()[:10].upper() -------------------------------------------------------------------------------- /src/typings/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import * 2 | from .exception import * 3 | from .general import * 4 | from .output import * 5 | from .request import * 6 | from .status import * 7 | -------------------------------------------------------------------------------- /src/typings/exception.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | 4 | class AgentBenchException(Exception): 5 | pass 6 | 7 | 8 | class ClientException(AgentBenchException): 9 | def __init__(self, reason: str, detail: Union[str, None] = None) -> None: 10 | super().__init__() 11 | self.reason = reason 12 | self.detail = detail 13 | 14 | def __str__(self) -> str: 15 | if not self.detail: 16 | return "{CLASS_NAME}[{REASON}]".format( 17 | CLASS_NAME=self.__class__.__name__, REASON=self.reason 18 | ) 19 | else: 20 | return "{CLASS_NAME}[{REASON}]: {DETAIL}".format( 21 | CLASS_NAME=self.__class__.__name__, 22 | REASON=self.reason, 23 | DETAIL=self.detail, 24 | ) 25 | 26 | 27 | class ServerException(AgentBenchException): 28 | pass 29 | 30 | 31 | class AgentClientException(ClientException): 32 | pass 33 | 34 | 35 | class TaskClientException(ClientException): 36 | pass 37 | 38 | 39 | class AgentContextLimitException(AgentClientException): 40 | def __init__(self, detail: Union[str, None] = None) -> None: 41 | super().__init__("agent_context_limit", detail) 42 | 43 | 44 | class AgentTimeoutException(AgentClientException): 45 | def __init__(self, detail: Union[str, None] = None) -> None: 46 | super().__init__("agent_timeout", detail) 47 | 48 | 49 | class AgentNetworkException(AgentClientException): 50 | def __init__(self, detail: Union[str, None] = None) -> None: 51 | super().__init__("agent_network", detail) 52 | 53 | 54 | class TaskTimeoutException(TaskClientException): 55 | def __init__(self, detail: Union[str, None] = None) -> None: 56 | super().__init__("task_timeout", detail) 57 | 58 | 59 | class TaskNetworkException(TaskClientException): 60 | def __init__(self, detail: Union[str, None] = None) -> None: 61 | super().__init__("task_network", detail) 62 | -------------------------------------------------------------------------------- /src/typings/general.py: -------------------------------------------------------------------------------- 1 | import builtins 2 | from typing import List, Dict, Union, Any, Literal 3 | 4 | from pydantic import BaseModel, validator 5 | 6 | JSONSerializable = Union[None, bool, int, float, str, List[Any], Dict[str, Any]] 7 | SampleIndex = Union[int, str] 8 | 9 | 10 | class InstanceFactory(BaseModel): 11 | module: str 12 | parameters: Dict[str, Any] = {} 13 | 14 | @validator("parameters", pre=True) 15 | def _ensure_dict(cls, v): 16 | if v is None: 17 | return {} 18 | return v 19 | 20 | def create(self): 21 | # print('>>>>>>>> ', self.module, self.parameters) 22 | splits = self.module.split(".") 23 | if len(splits) == 0: 24 | raise Exception("Invalid module name: {}".format(self.module)) 25 | if len(splits) == 1: 26 | g = globals() 27 | if self.module in g: 28 | class_type = g[self.module] 29 | else: 30 | class_type = getattr(builtins, self.module) 31 | return class_type(**self.parameters) 32 | else: 33 | path = ".".join(self.module.split(".")[:-1]) 34 | mod = __import__(path, fromlist=[self.module.split(".")[-1]]) 35 | return getattr(mod, self.module.split(".")[-1])(**self.parameters) 36 | 37 | 38 | class Assignment(BaseModel): 39 | agent: str 40 | task: str 41 | 42 | 43 | class ChatHistoryItem(BaseModel): 44 | role: Literal["user", "agent"] 45 | content: str 46 | -------------------------------------------------------------------------------- /src/typings/output.py: -------------------------------------------------------------------------------- 1 | from typing import Union, List 2 | 3 | from pydantic import BaseModel, root_validator 4 | 5 | from . import ChatHistoryItem 6 | from .general import JSONSerializable, SampleIndex 7 | from .status import SampleStatus, AgentOutputStatus 8 | 9 | 10 | class TaskOutput(BaseModel): 11 | index: Union[None, SampleIndex] = None 12 | status: SampleStatus = SampleStatus.RUNNING 13 | result: JSONSerializable = None 14 | history: Union[None, List[ChatHistoryItem]] = None 15 | 16 | 17 | class TaskSampleExecutionResult(BaseModel): 18 | status: SampleStatus = SampleStatus.COMPLETED 19 | result: JSONSerializable = None 20 | 21 | 22 | class AgentOutput(BaseModel): 23 | status: AgentOutputStatus = AgentOutputStatus.NORMAL 24 | content: Union[str, None] = None 25 | 26 | # at least one of them should be not None 27 | @root_validator(pre=False, skip_on_failure=True) 28 | def post_validate(cls, instance: dict): 29 | assert ( 30 | instance.get("status") is not AgentOutputStatus.NORMAL 31 | or instance.get("content") is not None 32 | ), "If status is NORMAL, content should not be None" 33 | return instance 34 | 35 | 36 | class TaskClientOutput(BaseModel): 37 | error: Union[str, None] = None 38 | info: Union[str, None] = None 39 | output: Union[TaskOutput, None] = None 40 | -------------------------------------------------------------------------------- /src/typings/request.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | from .general import SampleIndex 6 | from .output import AgentOutput, TaskOutput 7 | 8 | 9 | class RegisterRequest(BaseModel): 10 | name: str 11 | address: str 12 | concurrency: int 13 | indices: list 14 | 15 | 16 | class StartSampleRequest(BaseModel): 17 | name: str 18 | index: SampleIndex 19 | 20 | 21 | class InteractRequest(BaseModel): 22 | session_id: int 23 | agent_response: AgentOutput 24 | 25 | 26 | class CancelRequest(BaseModel): 27 | session_id: int 28 | 29 | 30 | class HeartbeatRequest(BaseModel): 31 | name: str 32 | address: str 33 | 34 | 35 | class CalculateOverallRequest(BaseModel): 36 | name: str 37 | results: List[TaskOutput] 38 | 39 | 40 | class WorkerStartSampleRequest(BaseModel): 41 | index: SampleIndex 42 | session_id: int 43 | 44 | 45 | class SampleStatusRequest(BaseModel): 46 | session_id: int 47 | -------------------------------------------------------------------------------- /src/typings/status.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum, Enum 2 | 3 | 4 | class SampleStatus(str, Enum): 5 | RUNNING = "running" 6 | COMPLETED = "completed" 7 | AGENT_CONTEXT_LIMIT = "agent context limit" 8 | AGENT_VALIDATION_FAILED = "agent validation failed" 9 | AGENT_INVALID_ACTION = "agent invalid action" 10 | TASK_LIMIT_REACHED = "task limit reached" 11 | UNKNOWN = "unknown" 12 | TASK_ERROR = "task error" 13 | 14 | 15 | class WorkerStatus(IntEnum): 16 | ALIVE = 0 17 | COMA = 1 18 | DEAD = 2 19 | 20 | 21 | class AgentOutputStatus(str, Enum): 22 | NORMAL = "normal" 23 | CANCELLED = "cancelled" 24 | AGENT_CONTEXT_LIMIT = "agent context limit" 25 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .max_flow import Graph, MaxFlow 2 | from .others import * 3 | from .rules import * 4 | -------------------------------------------------------------------------------- /src/utils/others.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | 5 | 6 | class JsonEncoder(json.JSONEncoder): 7 | """Convert numpy classes to JSON serializable objects.""" 8 | 9 | def default(self, obj): 10 | if isinstance(obj, (np.integer, np.floating, np.bool_)): 11 | return obj.item() 12 | elif isinstance(obj, np.ndarray): 13 | return obj.tolist() 14 | else: 15 | return super(JsonEncoder, self).default(obj) 16 | 17 | 18 | def serialize(obj, max_depth=5, compress=False): 19 | """ 20 | dump into json, including only basic types, list types and dict types. 21 | If other types are included, they will be converted into string. 22 | """ 23 | if max_depth <= 0: 24 | return "..." 25 | if isinstance(obj, (int, float, str, bool, type(None))): 26 | return obj 27 | elif isinstance(obj, list) or isinstance(obj, tuple): 28 | if not compress or len(obj) <= 5: 29 | return [serialize(item, max_depth - 1, compress) for item in obj] 30 | else: 31 | return [serialize(item, max_depth - 1, True) for item in obj[:5]] + [ 32 | "...(total: %d)" % len(obj) 33 | ] 34 | elif isinstance(obj, dict): 35 | if not compress or len(obj) <= 5: 36 | return { 37 | str(key): serialize(obj[key], max_depth - 1, compress) for key in obj 38 | } 39 | else: 40 | ret = { 41 | str(key): serialize(obj[key], max_depth - 1, True) 42 | for key in list(obj.keys())[:5] 43 | } 44 | ret["...total..."] = len(obj) 45 | return ret 46 | elif hasattr(obj, "__dict__"): 47 | return serialize(obj.__dict__, max_depth, True) 48 | else: 49 | ret = str(obj) 50 | if len(ret) > 100: 51 | ret = ret[:45] + " ... " + ret[-45:] 52 | return ret 53 | 54 | 55 | class ColorMessage: 56 | @staticmethod 57 | def red(msg): 58 | return "\033[91m" + msg + "\033[0m" 59 | 60 | @staticmethod 61 | def green(msg): 62 | return "\033[92m" + msg + "\033[0m" 63 | 64 | @staticmethod 65 | def cyan(msg): 66 | return "\033[96m" + msg + "\033[0m" 67 | 68 | @staticmethod 69 | def yellow(msg): 70 | return "\033[93m" + msg + "\033[0m" 71 | -------------------------------------------------------------------------------- /src/utils/rules.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | class RuleBase: 5 | def check(self, obj) -> bool: 6 | raise NotImplementedError() 7 | 8 | 9 | class ContainRule(RuleBase): 10 | def __init__(self, target, reverse=False) -> None: 11 | """ 12 | Check if target is in obj. 13 | `reverse`: if True, check if obj is in target 14 | """ 15 | self.target = target 16 | self.reverse = reverse 17 | 18 | def check(self, obj) -> bool: 19 | if self.reverse: 20 | return obj in self.target 21 | else: 22 | return self.target in obj 23 | 24 | 25 | class NotRule(RuleBase): 26 | def __init__(self, rule: RuleBase) -> None: 27 | self.rule = rule 28 | 29 | def check(self, obj) -> bool: 30 | return not self.rule.check(obj) 31 | 32 | 33 | class AndRule(RuleBase): 34 | def __init__(self, rules: List[RuleBase]) -> None: 35 | self.rules = rules 36 | 37 | def check(self, obj) -> bool: 38 | return all(rule.check(obj) for rule in self.rules) 39 | 40 | 41 | class OrRule(RuleBase): 42 | def __init__(self, rules: List[RuleBase]) -> None: 43 | self.rules = rules 44 | 45 | def check(self, obj) -> bool: 46 | return any(rule.check(obj) for rule in self.rules) 47 | --------------------------------------------------------------------------------