├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug-report---assistance-request.md
    │   └── feature-request.md
├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── agentbench.png
    ├── architecture.png
    ├── cover.jpg
    ├── intro.png
    ├── leaderboard.png
    ├── logo.png
    └── statistics.png
├── configs
    ├── agents
    │   ├── api_agents.yaml
    │   ├── fs_agent.yaml
    │   ├── openai-chat.yaml
    │   └── openai-text.yaml
    ├── assignments
    │   ├── default.yaml
    │   ├── definition.yaml
    │   └── test_avalon.yaml
    ├── start_task.yaml
    └── tasks
    │   ├── alfworld.yaml
    │   ├── avalon.yaml
    │   ├── card_game.yaml
    │   ├── dbbench.yaml
    │   ├── kg.yaml
    │   ├── ltp.yaml
    │   ├── mind2web.yaml
    │   ├── os.yaml
    │   ├── task_assembly.yaml
    │   └── webshop.yaml
├── data
    ├── alfworld
    │   ├── dev.json
    │   └── standard.json
    ├── avalon
    │   └── dev.json
    ├── dbbench
    │   ├── dev.jsonl
    │   └── standard.jsonl
    ├── knowledgegraph
    │   ├── dev.json
    │   └── std.json
    ├── lateralthinkingpuzzle
    │   ├── dev.xlsx
    │   └── standard.xlsx
    ├── lateralthinkingpuzzle_zh
    │   ├── dev.xlsx
    │   └── standard.xlsx
    ├── mind2web
    │   └── prompt
    │   │   ├── llm_prompt.json
    │   │   └── llm_prompt_cot.json
    └── os_interaction
    │   ├── data
    │       ├── 1
    │       │   └── stock.json
    │       ├── 2
    │       │   └── environment.json
    │       ├── 3
    │       │   └── ac.json
    │       ├── 4
    │       │   ├── N11.json
    │       │   ├── N225.json
    │       │   ├── N37.json
    │       │   ├── N4.json
    │       │   ├── N41.json
    │       │   ├── Q09.json
    │       │   ├── Q19.json
    │       │   ├── Q30.json
    │       │   ├── Q47.json
    │       │   └── Q49.json
    │       ├── 5
    │       │   └── new.json
    │       ├── 6
    │       │   └── new.json
    │       ├── 7
    │       │   └── bootstrap.json
    │       ├── 6-backup.json
    │       └── dev.json
    │   ├── res
    │       └── dockerfiles
    │       │   ├── default
    │       │   ├── packages
    │       │   └── ubuntu
    │   └── scripts
    │       ├── 1
    │           ├── check
    │           │   ├── 1.sh
    │           │   ├── containing.py
    │           │   ├── in.py
    │           │   ├── integer-match.py
    │           │   ├── size-match.py
    │           │   └── string-match.py
    │           ├── example
    │           │   └── 1.sh
    │           └── init
    │           │   ├── gen_words.sh
    │           │   ├── install_nettools.sh
    │           │   ├── nested_folders.sh
    │           │   └── stock-log.sh
    │       ├── 2
    │           ├── check
    │           │   ├── 1.sh
    │           │   ├── containing.py
    │           │   ├── in.py
    │           │   ├── integer-match.py
    │           │   ├── size-match.py
    │           │   └── string-match.py
    │           ├── example
    │           │   └── 1.sh
    │           └── init
    │           │   ├── gen_words.sh
    │           │   ├── install_nettools.sh
    │           │   ├── nested_folders.sh
    │           │   └── stock-log.sh
    │       ├── 3
    │           ├── check
    │           │   ├── 1.sh
    │           │   ├── containing.py
    │           │   ├── in.py
    │           │   ├── integer-match.py
    │           │   ├── size-match.py
    │           │   └── string-match.py
    │           ├── example
    │           │   └── 1.sh
    │           └── init
    │           │   ├── gen_words.sh
    │           │   ├── install_nettools.sh
    │           │   ├── nested_folders.sh
    │           │   └── stock-log.sh
    │       ├── 4
    │           ├── check
    │           │   ├── 1.sh
    │           │   ├── containing.py
    │           │   ├── in.py
    │           │   ├── integer-match.py
    │           │   ├── size-match.py
    │           │   └── string-match.py
    │           ├── example
    │           │   └── 1.sh
    │           └── init
    │           │   ├── gen_words.sh
    │           │   ├── install_nettools.sh
    │           │   ├── nested_folders.sh
    │           │   └── stock-log.sh
    │       ├── 5
    │           ├── check
    │           │   ├── containing.py
    │           │   ├── in.py
    │           │   ├── integer-match.py
    │           │   ├── size-match.py
    │           │   └── string-match.py
    │           ├── checking
    │           │   ├── 0.sh
    │           │   ├── 1.sh
    │           │   └── 2.sh
    │           ├── example
    │           │   ├── 0.sh
    │           │   ├── 1.sh
    │           │   └── 2.sh
    │           ├── file.json
    │           ├── init
    │           │   └── 1.sh
    │           ├── new.json
    │           └── prompt.md
    │       ├── 6
    │           ├── collected.json
    │           ├── collecting.py
    │           ├── new.json
    │           ├── permission.json
    │           └── stackoverflow
    │           │   └── spider.py
    │       ├── 7
    │           ├── check
    │           │   ├── 1.sh
    │           │   ├── containing.py
    │           │   ├── in.py
    │           │   ├── integer-match.py
    │           │   ├── size-match.py
    │           │   └── string-match.py
    │           ├── example
    │           │   └── 1.sh
    │           └── init
    │           │   ├── gen_words.sh
    │           │   ├── install_nettools.sh
    │           │   ├── nested_folders.sh
    │           │   └── stock-log.sh
    │       └── dev
    │           ├── check
    │               ├── 0.sh
    │               ├── containing.py
    │               ├── in.py
    │               ├── integer-match.py
    │               ├── size-match.py
    │               └── string-match.py
    │           ├── example
    │               └── 0.sh
    │           └── init
    │               └── stock-log.sh
├── docs
    ├── Config_cn.md
    ├── Config_en.md
    ├── Entrance_cn.md
    ├── Entrance_en.md
    ├── Extension_cn.md
    ├── Extension_en.md
    ├── Introduction_cn.md
    └── Introduction_en.md
├── requirements.txt
└── src
    ├── __init__.py
    ├── analysis.py
    ├── assigner.py
    ├── client
        ├── __init__.py
        ├── agent.py
        ├── agent_test.py
        ├── agents
        │   ├── __init__.py
        │   ├── claude_agent.py
        │   ├── fastchat_client.py
        │   ├── http_agent.py
        │   └── test_agent.py
        └── task.py
    ├── configs.py
    ├── server
        ├── __init__.py
        ├── task.py
        ├── task_controller.py
        ├── task_worker.py
        └── tasks
        │   ├── __init__.py
        │   ├── alfworld
        │       ├── __init__.py
        │       ├── configs
        │       │   └── base_config.yaml
        │       ├── environment.py
        │       ├── prompts
        │       │   ├── alfworld_multiturn.json
        │       │   ├── alfworld_multiturn_cot.json
        │       │   ├── alfworld_multiturn_new.json
        │       │   ├── alfworld_multiturn_plan_first.json
        │       │   ├── alfworld_multiturn_pure.json
        │       │   ├── alfworld_multiturn_react.json
        │       │   └── alfworld_singleturn.json
        │       ├── task.py
        │       └── utils.py
        │   ├── avalon
        │       ├── README.md
        │       ├── __init__.py
        │       ├── agents
        │       │   ├── agent.py
        │       │   ├── baseline_agents.py
        │       │   └── llm_with_discussion.py
        │       ├── avalon_exception.py
        │       ├── engine.py
        │       ├── prompts.py
        │       ├── task.py
        │       ├── task_scoring.py
        │       ├── utils.py
        │       └── wrapper.py
        │   ├── card_game
        │       ├── AI
        │       │   ├── AI_En.py
        │       │   ├── __init__.py
        │       │   ├── action1.py
        │       │   ├── action2.py
        │       │   ├── basline1.py
        │       │   ├── basline2.py
        │       │   ├── client.py
        │       │   ├── main.py
        │       │   ├── prompt
        │       │   │   ├── __init__.py
        │       │   │   ├── cn.py
        │       │   │   ├── cn_en.py
        │       │   │   └── en.py
        │       │   └── sdk
        │       │   │   ├── CMakeLists.txt
        │       │   │   ├── ai_client.cpython-310-x86_64-linux-gnu.so
        │       │   │   ├── ai_client.cpython-38-x86_64-linux-gnu.so
        │       │   │   ├── ai_client.hpp
        │       │   │   ├── ai_client.pyi
        │       │   │   ├── build
        │       │   │       ├── CMakeCache.txt
        │       │   │       ├── CMakeFiles
        │       │   │       │   ├── 3.25.0
        │       │   │       │   │   ├── CMakeCXXCompiler.cmake
        │       │   │       │   │   ├── CMakeDetermineCompilerABI_CXX.bin
        │       │   │       │   │   ├── CMakeSystem.cmake
        │       │   │       │   │   └── CompilerIdCXX
        │       │   │       │   │   │   ├── CMakeCXXCompilerId.cpp
        │       │   │       │   │   │   └── a.out
        │       │   │       │   ├── CMakeDirectoryInformation.cmake
        │       │   │       │   ├── CMakeOutput.log
        │       │   │       │   ├── Makefile.cmake
        │       │   │       │   ├── Makefile2
        │       │   │       │   ├── TargetDirectories.txt
        │       │   │       │   ├── ai_client.dir
        │       │   │       │   │   ├── DependInfo.cmake
        │       │   │       │   │   ├── build.make
        │       │   │       │   │   ├── cmake_clean.cmake
        │       │   │       │   │   ├── compiler_depend.make
        │       │   │       │   │   ├── compiler_depend.ts
        │       │   │       │   │   ├── depend.make
        │       │   │       │   │   ├── flags.make
        │       │   │       │   │   ├── jsoncpp
        │       │   │       │   │   │   ├── jsoncpp.cpp.o
        │       │   │       │   │   │   └── jsoncpp.cpp.o.d
        │       │   │       │   │   ├── link.txt
        │       │   │       │   │   ├── progress.make
        │       │   │       │   │   ├── py_ai_sdk.cpp.o
        │       │   │       │   │   └── py_ai_sdk.cpp.o.d
        │       │   │       │   ├── cmake.check_cache
        │       │   │       │   └── progress.marks
        │       │   │       ├── Makefile
        │       │   │       ├── bin
        │       │   │       │   └── ai_client.cpython-310-x86_64-linux-gnu.so
        │       │   │       └── cmake_install.cmake
        │       │   │   ├── jsoncpp
        │       │   │       ├── json
        │       │   │       │   ├── json-forwards.h
        │       │   │       │   └── json.h
        │       │   │       └── jsoncpp.cpp
        │       │   │   ├── py_ai_sdk.cpp
        │       │   │   ├── py_json_cast.hpp
        │       │   │   └── pyd
        │       │   │       ├── ai_client.cp38-win_amd64.pyd
        │       │   │       ├── ai_client.cpython-310-x86_64-linux-gnu.so
        │       │   │       ├── ai_client.cpython-36m-x86_64-linux-gnu.so
        │       │   │       ├── ai_client.cpython-38-darwin.so
        │       │   │       └── ai_client.cpython-38-x86_64-linux-gnu.so
        │       ├── __init__.py
        │       ├── judger
        │       │   ├── __init__.py
        │       │   ├── cal_metric.py
        │       │   ├── judger.py
        │       │   ├── rserver.py
        │       │   └── run_all.py
        │       ├── logic
        │       │   ├── .gitignore
        │       │   ├── Makefile
        │       │   ├── bin
        │       │   │   └── main
        │       │   └── src
        │       │   │   ├── aqua_sdk.cpp
        │       │   │   ├── fish.cpp
        │       │   │   ├── fish.h
        │       │   │   ├── fishset.cpp
        │       │   │   ├── fishset.h
        │       │   │   ├── game.cpp
        │       │   │   ├── game.h
        │       │   │   ├── jsoncpp
        │       │   │       ├── json
        │       │   │       │   ├── json-forwards.h
        │       │   │       │   └── json.h
        │       │   │       └── jsoncpp.cpp
        │       │   │   ├── main
        │       │   │   ├── main.cpp
        │       │   │   ├── player.cpp
        │       │   │   ├── player.h
        │       │   │   └── timer.h
        │       ├── server.py
        │       ├── task.py
        │       └── utils.py
        │   ├── dbbench
        │       ├── Interaction.py
        │       ├── __init__.py
        │       └── requirements.txt
        │   ├── knowledgegraph
        │       ├── __init__.py
        │       ├── api.py
        │       ├── ontology
        │       │   ├── fb_roles
        │       │   ├── fb_types
        │       │   ├── reverse_properties
        │       │   └── vocab.json
        │       ├── requirements.txt
        │       ├── task.py
        │       └── utils
        │       │   ├── logic_form_util.py
        │       │   ├── semparse_util.py
        │       │   └── sparql_executer.py
        │   ├── ltp
        │       ├── __init__.py
        │       └── task.py
        │   ├── mind2web
        │       ├── __init__.py
        │       ├── data_utils
        │       │   ├── __init__.py
        │       │   └── dom_utils.py
        │       ├── dataloader.py
        │       └── task.py
        │   ├── os_interaction
        │       ├── __init__.py
        │       ├── images.py
        │       ├── requirements.txt
        │       └── task.py
        │   └── webshop
        │       ├── .github
        │           ├── ISSUE_TEMPLATE.md
        │           ├── PULL_REQUEST_TEMPLATE.md
        │           └── workflows
        │           │   └── pytest.yml
        │       ├── .gitignore
        │       ├── LICENSE.md
        │       ├── README.md
        │       ├── __init__.py
        │       ├── assets
        │           ├── diagram.gif
        │           ├── model_ckpts.png
        │           └── transfer-logic.png
        │       ├── baseline_models
        │           ├── .gitignore
        │           ├── README.md
        │           ├── agent.py
        │           ├── env.py
        │           ├── generate_search.py
        │           ├── logger.py
        │           ├── models
        │           │   ├── bert.py
        │           │   ├── modules.py
        │           │   └── rnn.py
        │           ├── requirements.txt
        │           ├── test.py
        │           ├── train_choice_il.py
        │           ├── train_rl.py
        │           └── train_search_il.py
        │       ├── conftest.py
        │       ├── requirements.txt
        │       ├── run_dev.sh
        │       ├── run_envs
        │           ├── run_web_agent_site_env.py
        │           └── run_web_agent_text_env.py
        │       ├── run_prod.sh
        │       ├── run_web_agent_site_env.sh
        │       ├── run_web_agent_text_env.sh
        │       ├── search_engine
        │           ├── convert_product_file_format.py
        │           ├── lucene_searcher.py
        │           └── run_indexing.sh
        │       ├── setup.sh
        │       ├── tests
        │           ├── transfer
        │           │   ├── mocks
        │           │   │   ├── mock_parse_item_page_amz
        │           │   │   ├── mock_parse_item_page_ebay
        │           │   │   ├── mock_parse_item_page_ws
        │           │   │   ├── mock_parse_item_page_ws_desc
        │           │   │   ├── mock_parse_item_page_ws_feat
        │           │   │   ├── mock_parse_results_amz
        │           │   │   ├── mock_parse_results_ebay
        │           │   │   └── mock_parse_results_ws
        │           │   └── test_predict_help.py
        │           └── web-agent-site
        │           │   ├── engine
        │           │       ├── test_goal.py
        │           │       └── test_normalize.py
        │           │   └── test_utils.py
        │       ├── transfer
        │           ├── README.md
        │           ├── __init__.py
        │           ├── app.py
        │           ├── predict_help.py
        │           └── webshop_lite.py
        │       └── web_agent_site
        │           ├── __init__.py
        │           ├── app.py
        │           ├── attributes
        │               ├── annotate.py
        │               └── generate_attrs.py
        │           ├── engine
        │               ├── __init__.py
        │               ├── engine.py
        │               ├── goal.py
        │               └── normalize.py
        │           ├── envs
        │               ├── __init__.py
        │               ├── web_agent_site_env.py
        │               └── web_agent_text_env.py
        │           ├── models
        │               ├── __init__.py
        │               └── models.py
        │           ├── static
        │               ├── images
        │               │   └── no-image-available.png
        │               └── style.css
        │           ├── templates
        │               ├── attributes_page.html
        │               ├── description_page.html
        │               ├── done_page.html
        │               ├── features_page.html
        │               ├── item_page.html
        │               ├── results_page.html
        │               ├── review_page.html
        │               └── search_page.html
        │           └── utils.py
    ├── start_task.py
    ├── typings
        ├── __init__.py
        ├── config.py
        ├── exception.py
        ├── general.py
        ├── output.py
        ├── request.py
        └── status.py
    └── utils
        ├── __init__.py
        ├── max_flow.py
        ├── others.py
        └── rules.py


/.github/ISSUE_TEMPLATE/bug-report---assistance-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report & Assistance Request
 3 | about: Create a report to help us improve
 4 | title: "[Bug/Assistance] "
 5 | labels: bug, help wanted
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Screenshots or Terminal Copy&Paste**
21 | If applicable, add screenshots to help explain your problem.
22 | 
23 | **Desktop (please complete the following information):**
24 |  - OS: [e.g. Ubuntu 22.04]
25 |  - Python: [e.g. 3.9]
26 | 
27 | **Additional context**
28 | Add any other context about the problem here.
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: Suggest an idea for this project
 4 | title: "[Feature] "
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | %*
 3 | .idea
 4 | .vscode
 5 | src/tasks/humaneval_x/env/vendor
 6 | logs
 7 | outputs
 8 | data/full
 9 | results
10 | config.sh
11 | download
12 | .DS_Store
13 | # local*
14 | *.ipynb
15 | .cache
16 | src/server/tasks/card_game/result
17 | .dockerfile
18 | .dockerfile-cache
19 | analysis


--------------------------------------------------------------------------------
/assets/agentbench.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/agentbench.png


--------------------------------------------------------------------------------
/assets/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/architecture.png


--------------------------------------------------------------------------------
/assets/cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/cover.jpg


--------------------------------------------------------------------------------
/assets/intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/intro.png


--------------------------------------------------------------------------------
/assets/leaderboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/leaderboard.png


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/logo.png


--------------------------------------------------------------------------------
/assets/statistics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/assets/statistics.png


--------------------------------------------------------------------------------
/configs/agents/api_agents.yaml:
--------------------------------------------------------------------------------
 1 | gpt-3.5-turbo-0613:
 2 |     import: "./openai-chat.yaml"
 3 |     parameters:
 4 |         name: "gpt-3.5-turbo-0613"
 5 |         body:
 6 |             model: "gpt-3.5-turbo-0613"
 7 |             max_tokens: 512
 8 | 
 9 | text-davinci-003:
10 |     import: "./openai-text.yaml"
11 |     parameters:
12 |         name: "text-davinci-003"
13 |         body:
14 |             model: "text-davinci-003"
15 |             max_tokens: 512
16 | 
17 | text-davinci-002:
18 |     import: "./openai-text.yaml"
19 |     parameters:
20 |         name: "text-davinci-002"
21 |         body:
22 |             model: "text-davinci-002"
23 |             max_tokens: 512
24 | 


--------------------------------------------------------------------------------
/configs/agents/fs_agent.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: "src.client.agents.FastChatAgent"
 3 |   parameters:
 4 |     name: "FastChat"
 5 |     controller_address: "http://localhost:55555"
 6 |     max_new_tokens: 512
 7 |     temperature: 0
 8 | 
 9 | vicuna-33b:
10 |   parameters:
11 |     model_name: "vicuna-33b-v1.3"
12 | 
13 | wizard-30b:
14 |   parameters:
15 |     model_name: "WizardLM-30B-V1.0-merged"
16 | 
17 | vicuna-13b:
18 |   parameters:
19 |     model_name: "vicuna-13b-v1.5"
20 | 
21 | vicuna-7b:
22 |   parameters:
23 |     model_name: "vicuna-7b-v1.5"
24 | 


--------------------------------------------------------------------------------
/configs/agents/openai-chat.yaml:
--------------------------------------------------------------------------------
 1 | module: src.client.agents.HTTPAgent
 2 | parameters:
 3 |   url: https://api.openai.com/v1/chat/completions
 4 |   headers:
 5 |     Content-Type: application/json
 6 |     Authorization: Bearer <% PUT-YOUR-OPENAI-KEY-HERE %>
 7 |   body:
 8 |     temperature: 0
 9 |   prompter:
10 |     name: role_content_dict
11 |     args:
12 |       agent_role: assistant
13 |   return_format: "{response[choices][0][message][content]}"
14 | 


--------------------------------------------------------------------------------
/configs/agents/openai-text.yaml:
--------------------------------------------------------------------------------
 1 | module: src.client.agents.HTTPAgent
 2 | parameters:
 3 |   name: <% NAME %>
 4 |   url: https://api.openai.com/v1/completions
 5 |   headers:
 6 |     Content-Type: application/json
 7 |     Authorization: Bearer <% PUT-YOUR-OPENAI-KEY-HERE %>
 8 |   body:
 9 |     model: <% NAME %>
10 |     temperature: 0
11 |   prompter:
12 |     name: prompt_string
13 |   return_format: "{response[choices][0][text]}"
14 | 
15 | 


--------------------------------------------------------------------------------
/configs/assignments/default.yaml:
--------------------------------------------------------------------------------
 1 | import: definition.yaml
 2 | 
 3 | concurrency:
 4 |   task:
 5 |     dbbench-std: 5
 6 |     os-std: 5
 7 |   agent:
 8 |     gpt-3.5-turbo-0613: 5
 9 | 
10 | assignments: # List[Assignment] | Assignment
11 |   - agent: # "task": List[str] | str ,  "agent": List[str] | str
12 |       - gpt-3.5-turbo-0613
13 |     task:
14 |       - dbbench-std
15 |       - os-std
16 | 
17 | output: "outputs/{TIMESTAMP}"
18 | 


--------------------------------------------------------------------------------
/configs/assignments/definition.yaml:
--------------------------------------------------------------------------------
 1 | definition:
 2 |   task:
 3 |     overwrite:
 4 |       module: src.client.TaskClient
 5 |       parameters:
 6 |         controller_address: "http://localhost:5000/api"
 7 |     import: ../tasks/task_assembly.yaml
 8 |   agent:
 9 |     import:
10 |       - ../agents/api_agents.yaml
11 |       - ../agents/fs_agent.yaml


--------------------------------------------------------------------------------
/configs/assignments/test_avalon.yaml:
--------------------------------------------------------------------------------
 1 | import: definition.yaml
 2 | 
 3 | concurrency:
 4 |   task:
 5 |     avalon-dev-single: 3
 6 |   agent:
 7 |     gpt-3.5-turbo-0613: 3
 8 | 
 9 | assignments: # List[Assignment] | Assignment
10 |   - agent: # "task": List[str] | str ,  "agent": List[str] | str
11 |       - gpt-3.5-turbo-0613
12 |     task:
13 |       - avalon-dev-single
14 | 
15 | output: "outputs/{TIMESTAMP}"
16 | 


--------------------------------------------------------------------------------
/configs/start_task.yaml:
--------------------------------------------------------------------------------
1 | definition:
2 |   import: tasks/task_assembly.yaml
3 | 
4 | start:
5 |   dbbench-std: 5
6 |   os-std: 5
7 | 


--------------------------------------------------------------------------------
/configs/tasks/alfworld.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: src.server.tasks.alfworld.ALFWorld
 3 |   docker:
 4 |     image: longinyu/agentbench-alfworld
 5 |     command: umask 0; [ -f /root/.setup.sh ] && bash /root/.setup.sh;
 6 |   parameters:
 7 |     name: alfworld-std
 8 |     data_path: "/AgentBench/data/alfworld"
 9 |     config_path: "src/server/tasks/alfworld/configs/base_config.yaml"
10 |     prompts_path: "src/server/tasks/alfworld/prompts/alfworld_multiturn_plan_first.json"
11 |     split: "standard"
12 |     max_step: 35
13 | 
14 | alfworld-dev:
15 |   parameters:
16 |     name: alfworld-dev
17 |     split: "dev"
18 | 
19 | alfworld-std:
20 |   parameters:
21 |     name: alfworld-std
22 |     split: "standard"
23 | 


--------------------------------------------------------------------------------
/configs/tasks/avalon.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: "src.server.tasks.avalon.AvalonBench"
 3 |   parameters:
 4 |     concurrency: 5
 5 |     num_players: 5
 6 |     discussion: False
 7 | 
 8 | avalon-dev-naive:
 9 |   parameters:
10 |     name: "AvalonBench-dev-naive"
11 |     data_file: "data/avalon/dev.json"
12 |     agent_list: ["naive", "naive", "naive", "naive", "naive"]
13 | 
14 | avalon-dev-single:
15 |   parameters:
16 |     name: "AvalonBench-dev-single"
17 |     data_file: "data/avalon/dev.json"
18 |     agent_list: ["llm", "naive", "naive", "naive", "naive"]


--------------------------------------------------------------------------------
/configs/tasks/card_game.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: src.server.tasks.card_game.CardGame
 3 |   docker:
 4 |     image: longinyu/agentbench-card_game
 5 |   parameters:
 6 |     port: 12347
 7 |     workers: 10
 8 | 
 9 | cg-dev:
10 |   parameters:
11 |     name: "CardGame-dev"
12 |     test_time: 3
13 | 
14 | cg-std:
15 |   parameters:
16 |     name: "CardGame-std"
17 |     test_time: 5
18 | 


--------------------------------------------------------------------------------
/configs/tasks/dbbench.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: src.server.tasks.dbbench.DBBench
 3 |   parameters:
 4 |     concurrency: 1
 5 |     max_round: 15
 6 | 
 7 | dbbench-dev:
 8 |   parameters:
 9 |     name: dbbench-dev
10 |     data_file: "data/dbbench/dev.jsonl"
11 | 
12 | dbbench-std:
13 |   parameters:
14 |     name: dbbench-std
15 |     data_file: "data/dbbench/standard.jsonl"
16 | 


--------------------------------------------------------------------------------
/configs/tasks/kg.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: "src.server.tasks.knowledgegraph.KnowledgeGraph"
 3 |   parameters:
 4 |     round: 15
 5 |     sparql_url: "http://164.107.116.56:3093/sparql"
 6 | 
 7 | kg-dev:
 8 |   parameters:
 9 |     name: "KnowledgeGraph-dev"
10 |     data_file: "data/knowledgegraph/dev.json"
11 | 
12 | kg-std:
13 |   parameters:
14 |     name: "KnowledgeGraph-std"
15 |     data_file: "data/knowledgegraph/std.json"
16 | 


--------------------------------------------------------------------------------
/configs/tasks/ltp.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: src.server.tasks.ltp.LateralThinkingPuzzle
 3 |   docker:
 4 |     image: longinyu/agentbench-ltp
 5 |   parameters:
 6 |     round: 25
 7 |     eval_yaml: "configs/agents/api_agents.yaml"
 8 | 
 9 | ltp-dev:
10 |   parameters:
11 |     name: ltp-dev
12 |     filepath: "data/lateralthinkingpuzzle/dev.xlsx"
13 | 
14 | ltp-std:
15 |   parameters:
16 |     name: ltp-std
17 |     filepath: "data/lateralthinkingpuzzle/standard.xlsx"


--------------------------------------------------------------------------------
/configs/tasks/mind2web.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: src.server.tasks.mind2web.Mind2Web
 3 |   docker:
 4 |     image: longinyu/agentbench-mind2web
 5 |   parameters:
 6 |     concurrency: 10
 7 |     data:
 8 |       data_path: "."
 9 |       cache_path: "./data/mind2web/.cache/data"
10 |       score_file: /root/work/data/scores_all_data.pkl
11 |     train:
12 |       neg_ratio: 0.2
13 |       num_candidates: 5
14 |       max_context_len: 512
15 |     model:
16 |       mode: "multichoice"
17 |       name: flan-t5-base
18 |       model_name_or_path: "google/flan-t5-base"
19 |       max_seq_length: 2048
20 |     eval:
21 |       topk: 10
22 |     seed: 123
23 |     llm_prompt: data/mind2web/prompt/llm_prompt_cot.json
24 | 
25 | m2w-dev:
26 |   parameters:
27 |     name: "Mind2Web-dev"
28 |     data:
29 |       test_split_files:
30 |         test_domain: /root/work/data/data_dev/*.json
31 | 
32 | m2w-std:
33 |   parameters:
34 |     name: "Mind2Web-std"
35 |     data:
36 |       test_split_files:
37 |         test_domain: /root/work/data/data_std/*.json
38 | 


--------------------------------------------------------------------------------
/configs/tasks/os.yaml:
--------------------------------------------------------------------------------
 1 | os-dev:
 2 |   module: "src.server.tasks.os_interaction.OSInteraction"
 3 |   parameters:
 4 |     name: "os-dev"
 5 |     concurrency: 24
 6 |     round_limit: 8
 7 | 
 8 |     docker_config:
 9 |       localhost: local-os
10 |       directory: data/os_interaction/res/dockerfiles
11 | 
12 |     scripts:
13 |       directory: data/os_interaction/res/scripts
14 | 
15 |     data_config:
16 |       files:
17 |         - problem_file: data/os_interaction/data/dev.json
18 |           script_dir: data/os_interaction/scripts/dev/
19 |           index_prefix: "dev-001-"
20 | 
21 |       bk: [ ]
22 |       ignore: [ ]
23 | 
24 | os-std:
25 |   module: "src.server.tasks.os_interaction.OSInteraction"
26 |   parameters:
27 |     name: "os-std"
28 |     concurrency: 24
29 |     round_limit: 8
30 | 
31 |     docker_config:
32 |       localhost: local-os
33 |       directory: data/os_interaction/res/dockerfiles
34 | 
35 |     scripts:
36 |       directory: data/os_interaction/res/scripts
37 | 
38 |     data_config:
39 |       files:
40 |         - problem_file: data/os_interaction/data/1/*.json
41 |           script_dir: data/os_interaction/scripts/1/
42 |           index_prefix: "std-001-"
43 |         - problem_file: data/os_interaction/data/2/*.json
44 |           script_dir: data/os_interaction/scripts/2/
45 |           index_prefix: "std-002-"
46 |         - problem_file: data/os_interaction/data/3/*.json
47 |           script_dir: data/os_interaction/scripts/3/
48 |           index_prefix: "std-003-"
49 |         - problem_file: data/os_interaction/data/4/*.json
50 |           script_dir: data/os_interaction/scripts/4/
51 |           index_prefix: "std-004-"
52 |         - problem_file: data/os_interaction/data/5/*.json
53 |           script_dir: data/os_interaction/scripts/5/
54 |           index_prefix: "std-005-"
55 |         - problem_file: data/os_interaction/data/6/*.json
56 |           script_dir: data/os_interaction/scripts/6/
57 |           index_prefix: "std-006-"
58 |         - problem_file: data/os_interaction/data/7/*.json
59 |           script_dir: data/os_interaction/scripts/7/
60 |           index_prefix: "std-007-"
61 | 
62 |       bk: [ ]
63 |       ignore: [ ]
64 | 


--------------------------------------------------------------------------------
/configs/tasks/task_assembly.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   docker:
 3 |     command: umask 0; [ -f /root/.setup.sh ] && bash /root/.setup.sh;
 4 | 
 5 | import:
 6 |   - webshop.yaml
 7 |   - dbbench.yaml
 8 |   - mind2web.yaml
 9 |   - card_game.yaml
10 |   - kg.yaml
11 |   - os.yaml
12 |   - ltp.yaml
13 |   - alfworld.yaml
14 |   - avalon.yaml


--------------------------------------------------------------------------------
/configs/tasks/webshop.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   module: src.server.tasks.webshop_docker.WebShop
 3 |   parameters:
 4 |     concurrency: 1
 5 |   docker:
 6 |     image: longinyu/agentbench-webshop
 7 |     command: ln -s /root/webshop /root/workspace/src/server/tasks/webshop_docker;cp /root/workspace/src/server/tasks/webshop/__init__.py /root/webshop/__init__.py;
 8 | 
 9 | webshop-dev:
10 |   parameters:
11 |     name: webshop-dev
12 |     start: 200
13 |     end: 280
14 | 
15 | webshop-std:
16 |   parameters:
17 |     name: webshop-std
18 |     start: 0
19 |     end: 200
20 | 


--------------------------------------------------------------------------------
/data/alfworld/dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "pick_and_place": [
 3 |     "json_2.1.1/valid_unseen/pick_and_place_simple-SoapBottle-None-Toilet-424/trial_T20190907_004404_604165/game.tw-pddl",
 4 |     "json_2.1.1/valid_unseen/pick_and_place_simple-Pencil-None-Shelf-308/trial_T20190908_122154_042763/game.tw-pddl",
 5 |     "json_2.1.1/valid_unseen/pick_and_place_simple-SaltShaker-None-Cabinet-10/trial_T20190906_191445_723170/game.tw-pddl",
 6 |     "json_2.1.1/valid_unseen/pick_and_place_simple-Mug-None-Desk-308/trial_T20190909_210238_431966/game.tw-pddl"
 7 |   ],
 8 |   "pick_clean_then_place": [
 9 |     "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Cloth-None-Cabinet-424/trial_T20190908_022436_073995/game.tw-pddl",
10 |     "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-Cabinet-424/trial_T20190908_215019_162873/game.tw-pddl",
11 |     "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-Pan-None-CounterTop-10/trial_T20190908_032543_712058/game.tw-pddl",
12 |     "json_2.1.1/valid_unseen/pick_clean_then_place_in_recep-SoapBar-None-CounterTop-424/trial_T20190907_074106_050405/game.tw-pddl"
13 |   ],
14 |   "pick_heat_then_place": [
15 |     "json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Cup-None-Cabinet-10/trial_T20190907_083346_800823/game.tw-pddl",
16 |     "json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Cup-None-Cabinet-10/trial_T20190907_083507_594820/game.tw-pddl",
17 |     "json_2.1.1/valid_unseen/pick_heat_then_place_in_recep-Tomato-None-GarbageCan-10/trial_T20190908_225453_272533/game.tw-pddl"
18 |   ],
19 |   "pick_cool_then_place": [
20 |     "json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Potato-None-Microwave-10/trial_T20190907_033157_424297/game.tw-pddl",
21 |     "json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Pan-None-CounterTop-10/trial_T20190908_114622_738670/game.tw-pddl",
22 |     "json_2.1.1/valid_unseen/pick_cool_then_place_in_recep-Bread-None-CounterTop-10/trial_T20190908_091747_866951/game.tw-pddl"
23 |   ],
24 |   "look_at_obj": [
25 |     "json_2.1.1/valid_unseen/look_at_obj_in_light-Mug-None-DeskLamp-308/trial_T20190908_161733_213242/game.tw-pddl",
26 |     "json_2.1.1/valid_unseen/look_at_obj_in_light-Bowl-None-DeskLamp-308/trial_T20190907_133935_066606/game.tw-pddl",
27 |     "json_2.1.1/valid_unseen/look_at_obj_in_light-Pencil-None-DeskLamp-308/trial_T20190908_220656_510400/game.tw-pddl"
28 |   ],
29 |   "pick_two_obj": [
30 |     "json_2.1.1/valid_unseen/pick_two_obj_and_place-Pillow-None-Sofa-219/trial_T20190907_163327_486300/game.tw-pddl",
31 |     "json_2.1.1/valid_unseen/pick_two_obj_and_place-SoapBar-None-GarbageCan-424/trial_T20190909_064053_839817/game.tw-pddl",
32 |     "json_2.1.1/valid_unseen/pick_two_obj_and_place-CD-None-Safe-308/trial_T20190907_051013_060265/game.tw-pddl"
33 |   ]
34 | }


--------------------------------------------------------------------------------
/data/avalon/dev.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "num_players": 5,
  4 |         "quest_leader": 0,
  5 |         "role_names": ["Servant", "Merlin", "Servant", "Assassin", "Minion"]
  6 |     },
  7 |     {
  8 |         "num_players": 5,
  9 |         "quest_leader": 0,
 10 |         "role_names": ["Servant", "Assassin", "Servant", "Merlin", "Minion"]
 11 |     },
 12 |     {
 13 |         "num_players": 5,
 14 |         "quest_leader": 0,
 15 |         "role_names": ["Servant", "Assassin", "Merlin", "Servant", "Minion"]
 16 |     },
 17 |     {
 18 |         "num_players": 5,
 19 |         "quest_leader": 0,
 20 |         "role_names": ["Servant", "Servant", "Assassin", "Merlin", "Minion"]
 21 |     },
 22 |     {
 23 |         "num_players": 5,
 24 |         "quest_leader": 0,
 25 |         "role_names": ["Servant", "Minion", "Assassin", "Merlin", "Servant"]
 26 |     },
 27 |     {
 28 |         "num_players": 5,
 29 |         "quest_leader": 0,
 30 |         "role_names": ["Assassin", "Servant", "Merlin", "Servant", "Minion"]
 31 |     },
 32 |     {
 33 |         "num_players": 5,
 34 |         "quest_leader": 0,
 35 |         "role_names": ["Assassin", "Servant", "Servant", "Merlin", "Minion"]
 36 |     },
 37 |     {
 38 |         "num_players": 5,
 39 |         "quest_leader": 0,
 40 |         "role_names": ["Assassin", "Servant", "Servant", "Minion", "Merlin"]
 41 |     },
 42 |     {
 43 |         "num_players": 5,
 44 |         "quest_leader": 0,
 45 |         "role_names": ["Assassin", "Merlin", "Servant", "Servant", "Minion"]
 46 |     },
 47 |     {
 48 |         "num_players": 5,
 49 |         "quest_leader": 0,
 50 |         "role_names": ["Assassin", "Minion", "Servant", "Servant", "Merlin"]
 51 |     },
 52 |     {
 53 |         "num_players": 5,
 54 |         "quest_leader": 1,
 55 |         "role_names": ["Servant", "Merlin", "Servant", "Assassin", "Minion"]
 56 |     },
 57 |     {
 58 |         "num_players": 5,
 59 |         "quest_leader": 1,
 60 |         "role_names": ["Servant", "Assassin", "Servant", "Merlin", "Minion"]
 61 |     },
 62 |     {
 63 |         "num_players": 5,
 64 |         "quest_leader": 1,
 65 |         "role_names": ["Servant", "Assassin", "Merlin", "Servant", "Minion"]
 66 |     },
 67 |     {
 68 |         "num_players": 5,
 69 |         "quest_leader": 1,
 70 |         "role_names": ["Servant", "Servant", "Assassin", "Merlin", "Minion"]
 71 |     },
 72 |     {
 73 |         "num_players": 5,
 74 |         "quest_leader": 1,
 75 |         "role_names": ["Servant", "Minion", "Assassin", "Merlin", "Servant"]
 76 |     },
 77 |     {
 78 |         "num_players": 5,
 79 |         "quest_leader": 1,
 80 |         "role_names": ["Assassin", "Servant", "Merlin", "Servant", "Minion"]
 81 |     },
 82 |     {
 83 |         "num_players": 5,
 84 |         "quest_leader": 1,
 85 |         "role_names": ["Assassin", "Servant", "Servant", "Merlin", "Minion"]
 86 |     },
 87 |     {
 88 |         "num_players": 5,
 89 |         "quest_leader": 1,
 90 |         "role_names": ["Assassin", "Servant", "Servant", "Minion", "Merlin"]
 91 |     },
 92 |     {
 93 |         "num_players": 5,
 94 |         "quest_leader": 1,
 95 |         "role_names": ["Assassin", "Merlin", "Servant", "Servant", "Minion"]
 96 |     },
 97 |     {
 98 |         "num_players": 5,
 99 |         "quest_leader": 1,
100 |         "role_names": ["Assassin", "Minion", "Servant", "Servant", "Merlin"]
101 |     }
102 | ]


--------------------------------------------------------------------------------
/data/lateralthinkingpuzzle/dev.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle/dev.xlsx


--------------------------------------------------------------------------------
/data/lateralthinkingpuzzle/standard.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle/standard.xlsx


--------------------------------------------------------------------------------
/data/lateralthinkingpuzzle_zh/dev.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle_zh/dev.xlsx


--------------------------------------------------------------------------------
/data/lateralthinkingpuzzle_zh/standard.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/lateralthinkingpuzzle_zh/standard.xlsx


--------------------------------------------------------------------------------
/data/os_interaction/data/3/ac.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "description": "Tell me the number of CPUs.",
  4 |         "evaluation": {
  5 |             "check": [
  6 |                 null,
  7 |                 {
  8 |                     "language": "python",
  9 |                     "file": "check/integer-match.py"
 10 |                 }
 11 |             ],
 12 |             "example": "nproc"
 13 |         },
 14 |         "labels": [
 15 |             "command",
 16 |             "CPU",
 17 |             "device",
 18 |             "hardware",
 19 |             "processor",
 20 |             "system"
 21 |         ]
 22 |     },
 23 |     {
 24 |         "description": "How many groups are in this UNIX system?",
 25 |         "evaluation": {
 26 |             "check": [
 27 |                 null,
 28 |                 {
 29 |                     "language": "python",
 30 |                     "file": "check/integer-match.py"
 31 |                 }
 32 |             ],
 33 |             "example": "getent group | wc -l"
 34 |         },
 35 |         "labels": ["command", "system"]
 36 |     },
 37 |     {
 38 |         "description": "Recursively Find how many files have the extension .tep",
 39 |         "create": {
 40 |             "local": "default",
 41 |             "init": "echo 123>/root/.tmp\necho 123>/root/.tmp2\necho 123>/root/1.tep\necho 123>/root/.tep\necho 123>/root/.u\necho 123>/root/.k\n"
 42 |         },
 43 |         "evaluation": {
 44 |             "check": [
 45 |                 null,
 46 |                 {
 47 |                     "language": "python",
 48 |                     "file": "check/integer-match.py"
 49 |                 }
 50 |             ],
 51 |             "example": "find . -type f -name \"*.tep\" | wc -l"
 52 |         },
 53 |         "labels": ["command", "file"]
 54 |     },
 55 |     {
 56 |         "description": "Tell me whether npm is installed or not. If it is installed, return 'yes'. If it is not installed, return 'no'",
 57 |         "evaluation": {
 58 |             "check": [
 59 |                 null,
 60 |                 {
 61 |                     "language": "python",
 62 |                     "file": "check/string-match.py"
 63 |                 }
 64 |             ],
 65 |             "example": "[ -x \"$(command -v npm)\" ] && echo 'yes' || echo 'no'"
 66 |         },
 67 |         "labels": ["command", "packages"]
 68 |     },
 69 |     {
 70 |         "description": "Tell me whether npm is installed or not. If it is installed, return 'yes'. If it is not installed, return 'no'",
 71 |         "create": {
 72 |             "local": "packages"
 73 |         },
 74 |         "evaluation": {
 75 |             "check": [
 76 |                 null,
 77 |                 {
 78 |                     "language": "python",
 79 |                     "file": "check/string-match.py"
 80 |                 }
 81 |             ],
 82 |             "example": "[ -x \"$(command -v npm)\" ] && echo 'yes' || echo 'no'"
 83 |         },
 84 |         "labels": ["command", "packages"]
 85 |     },
 86 |     {
 87 |         "description": "Tell me the max number of threads in Linux.",
 88 |         "evaluation": {
 89 |             "check": [
 90 |                 null,
 91 |                 {
 92 |                     "language": "python",
 93 |                     "file": "check/string-match.py"
 94 |                 }
 95 |             ],
 96 |             "example": "cat /proc/sys/kernel/threads-max"
 97 |         },
 98 |         "labels": ["command", "file"]
 99 |     }
100 | ]
101 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/N11.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "How many hidden files, which do not contain the character u, are in /usr? (not recursively)",
 3 |     "create": {
 4 |         "local": "default",
 5 |         "init": "echo 123>/usr/.tmp\necho 123>/usr/tmp2\necho 123>/usr/.tm12p\necho 123>/usr/.f\necho 123>/usr/.u\necho 123>/usr/k\n"
 6 |     },
 7 |     "evaluation": {
 8 |         "check": [
 9 |             null,
10 |             {
11 |                 "language": "python",
12 |                 "file": "check/integer-match.py"
13 |             }
14 |         ],
15 |         "example": {
16 |             "code": "find /usr -type f -name \".*\" -not -name \"*k*\"  | wc -l"
17 |         }
18 |     },
19 |     "labels": ["file"]
20 | }
21 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/N225.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Get the name of most recent file in /usr. (not recursively)",
 3 |     "create": {
 4 |         "local": "default",
 5 |         "init": "i=1; while [ $i -lt 100 ]; do echo \"Content: $((i * 2 % 3))\" > \"/usr/$(( (i+50)%100 )).log\"; i=$((i+1)); done"
 6 |     },
 7 |     "evaluation": {
 8 |         "check": [
 9 |             null,
10 |             {
11 |                 "language": "python",
12 |                 "file": "check/string-match.py"
13 |             }
14 |         ],
15 |         "example": {
16 |             "code": "ls -lt /usr | head -n 2 | tail -n 1 | awk '{print $9}'"
17 |         }
18 |     },
19 |     "labels": ["file"]
20 | }
21 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/N37.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "What's the full path of a file \"TOOLS\"?",
 3 |     "create": {
 4 |         "local": "default",
 5 |         "init": "echo 123>/usr/TOOLS"
 6 |     },
 7 |     "evaluation": {
 8 |         "check": [
 9 |             null,
10 |             {
11 |                 "language": "python",
12 |                 "file": "check/string-match.py"
13 |             }
14 |         ],
15 |         "example": "find / -name TOOLS"
16 |     },
17 |     "labels": ["path", "file"]
18 | }
19 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/N4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "execute ~/test and get the output.",
 3 |     "start": "useradd -s /bin/bash -d /home/jack -m jack && cd /home/jack && echo 'echo love' > test && chmod 000 test && chown jack test && su - jack",
 4 |     "evaluation": {
 5 |         "match": "love",
 6 |         "example": {
 7 |             "code": "chmod 777 test && ~/test"
 8 |         }
 9 |     },
10 |     "labels": ["permission", "user"]
11 | }
12 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/N41.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).",
 4 |         "create": {
 5 |             "local": "default",
 6 |             "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme"
 7 |         },
 8 |         "evaluation": {
 9 |             "match": "no"
10 |         },
11 |         "labels": ["file"]
12 |     },
13 |     {
14 |         "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).",
15 |         "create": {
16 |             "local": "default",
17 |             "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme && mkdir /working/OS/workspace"
18 |         },
19 |         "evaluation": {
20 |             "match": "no"
21 |         },
22 |         "labels": ["file"]
23 |     },
24 |     {
25 |         "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).",
26 |         "create": {
27 |             "local": "default",
28 |             "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme && touch /working/OS/workspace"
29 |         },
30 |         "evaluation": {
31 |             "match": "yes"
32 |         },
33 |         "labels": ["file"]
34 |     },
35 |     {
36 |         "description": "If a regular file named \"workspace\" exists in /working (recursively)? If so, output \"yes\", otherwise output \"no\" (with out quotes).",
37 |         "create": {
38 |             "local": "default",
39 |             "init": "rm -rf /working; mkdir /working && echo 123>/working/.tmp && echo 123>/working/.tm12p && echo 123>/working/.f && mkdir /working/OS && echo 'OS' > /working/OS/.theme && touch /working/workspace"
40 |         },
41 |         "evaluation": {
42 |             "match": "yes"
43 |         },
44 |         "labels": ["file"]
45 |     }
46 | ]
47 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/Q09.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "description": "What is the total number of files in the /var directory? (recursively)",
 4 |         "create": {
 5 |             "local": "default",
 6 |             "init": "echo 123321>/var/tmp.log && echo 123321>/var/dsdd && echo 123321>/var/ppoo && echo 123321>/var/gdss && echo 123321>/var/itcs && echo 123321>/var/ksmx"
 7 |         },
 8 |         "evaluation": {
 9 |             "check": [
10 |                 null,
11 |                 {
12 |                     "language": "python",
13 |                     "file": "check/integer-match.py"
14 |                 }
15 |             ],
16 |             "example": {
17 |                 "code": "find /var -type f | wc -l"
18 |             }
19 |         },
20 |         "labels": ["command", "file"]
21 |     }
22 | ]
23 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/Q19.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "description": "How many lines are there in /root/matrix.txt",
 4 |         "create": {
 5 |             "local": "default",
 6 |             "init":"echo \"1 2 3\n4 5 6\n7 8 9\" > /root/matrix.txt"
 7 |         },
 8 |         "evaluation": {
 9 |             "check": [
10 |                 null,
11 |                 {
12 |                     "language": "python",
13 |                     "file": "check/integer-match.py"
14 |                 }
15 |             ],
16 |             "example": {
17 |                 "code": "cat /root/matrix.txt | wc -l"
18 |             }
19 |         },
20 |         "labels": ["command","file"]
21 |     }
22 | ]


--------------------------------------------------------------------------------
/data/os_interaction/data/4/Q47.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "description": "Move the file MyPersonalComputer.config to current directory. And tell me the path of the file before copying.",
 4 |         "create": {
 5 |             "local": "default",
 6 |             "init": "mkdir /home/user1 && mkdir /home/user1/os && mkdir /home/user1/os/linux && mkdir /home/user1/os/linux/commands && echo 'Linux-Ubuntu-20.04?' > /home/user1/os/linux/commands/MyPersonalComputer.config"
 7 |         },
 8 |         "start": "mkdir /home/user2 && cd /home/user2",
 9 |         "evaluation": {
10 |             "check": [
11 |                 {
12 |                     "language": "python",
13 |                     "code": "from sys import argv\nassert argv[1].strip()=='/home/user1/os/linux/commands/MyPersonalComputer.config'"
14 |                 },
15 |                 {
16 |                     "language": "bash",
17 |                     "code": "find /home/user1/os/linux/commands/MyPersonalComputer.config >/dev/null 2>/dev/null && exit 1 || exit 0"
18 |                 },
19 |                 {
20 |                     "language": "bash",
21 |                     "code": "find /home/user2/MyPersonalComputer.config >/dev/null 2>/dev/null && exit 0 || exit 1"
22 |                 }
23 |             ],
24 |             "example": {
25 |                 "code": "mv /home/user1/os/linux/commands/MyPersonalComputer.config /home/user2/ && echo /home/user1/os/linux/commands/MyPersonalComputer.config"
26 |             }
27 |         },
28 |         "labels": ["path", "files"]
29 |     }
30 | ]
31 | 


--------------------------------------------------------------------------------
/data/os_interaction/data/4/Q49.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "description": "Test if variable var is an integer? If so, tell me \"yes\", otherwise tell me \"no\". (Without quotes)",
 4 |         "create": {
 5 |             "local": "default"
 6 |         },
 7 |         "start": "var=3.14",
 8 |         "evaluation": {
 9 |             "match": "no",
10 |             "example": {
11 |                 "code": "test \"$var\" -eq \"$var\" 2>/dev/null && echo \"yes\" || echo \"no\""
12 |             }
13 |         },
14 |         "labels": ["command", "variables"]
15 |     },
16 |     {
17 |         "description": "Test if variable var is an integer? If so, tell me \"yes\", otherwise tell me \"no\". (Without quotes)",
18 |         "create": {
19 |             "local": "default"
20 |         },
21 |         "start": "var=10",
22 |         "evaluation": {
23 |             "match": "yes",
24 |             "example": {
25 |                 "code": "test \"$var\" -eq \"$var\" 2>/dev/null && echo \"yes\" || echo \"no\""
26 |             }
27 |         },
28 |         "labels": ["command", "variables"]
29 |     }
30 | ]
31 | 


--------------------------------------------------------------------------------
/data/os_interaction/res/dockerfiles/default:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 | RUN apt update && apt install python3 -y && apt install python3-pip -y && apt install git -y && apt install vim -y && apt install curl -y && apt install wget -y && apt install unzip -y && apt install zip -y && apt install tree -y
3 | CMD ["bash"]
4 | 


--------------------------------------------------------------------------------
/data/os_interaction/res/dockerfiles/packages:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 | RUN apt update && apt install python3 -y && apt install python3-pip -y && apt install git -y && apt install vim -y && apt install curl -y && apt install wget -y && apt install unzip -y && apt install zip -y && apt install tree -y && apt install npm -y
3 | CMD ["bash"]
4 | 


--------------------------------------------------------------------------------
/data/os_interaction/res/dockerfiles/ubuntu:
--------------------------------------------------------------------------------
1 | FROM ubuntu
2 | RUN echo "tmp" > file.txt
3 | CMD ["bash"]


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/check/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | count_files() {
 4 |     # echo $1 >> tmp.log
 5 |     local dir=$1
 6 |     local count=0
 7 | 
 8 |     for file in "$dir"/*; do
 9 |         if [ -f "$file" ]; then
10 |             count=$((count + 1))
11 |         elif [ -d "$file" ]; then
12 |             count_sub=$(count_files "$file")
13 |             count=$((count + count_sub))
14 |         fi
15 |     done
16 | 
17 |     echo "$count"
18 | }
19 | 
20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
21 | 
22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/init/nested_folders.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | create_nested_folders() {
 4 |     local depth=$1  # Current depth
 5 |     local max_depth=$2  # Max depth
 6 | 
 7 | 
 8 |     mkdir "folder$depth"
 9 |     cd "folder$depth"
10 | 
11 |     touch "echo-love"
12 | 
13 |     if [ $depth -eq 5 ]; then
14 |         echo "echo \"I love myself.\"" > "echo-love"
15 |         chmod +x "echo-love"
16 |     fi
17 | 
18 |     if [ $depth -lt $max_depth ]; then
19 |         create_nested_folders $((depth + 1)) $max_depth
20 |     fi
21 | 
22 |     cd ..
23 | }
24 | 
25 | max_depth=10
26 | 
27 | create_nested_folders 1 $max_depth
28 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/1/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/check/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | count_files() {
 4 |     # echo $1 >> tmp.log
 5 |     local dir=$1
 6 |     local count=0
 7 | 
 8 |     for file in "$dir"/*; do
 9 |         if [ -f "$file" ]; then
10 |             count=$((count + 1))
11 |         elif [ -d "$file" ]; then
12 |             count_sub=$(count_files "$file")
13 |             count=$((count + count_sub))
14 |         fi
15 |     done
16 | 
17 |     echo "$count"
18 | }
19 | 
20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
21 | 
22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/init/nested_folders.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | create_nested_folders() {
 4 |     local depth=$1  # Current depth
 5 |     local max_depth=$2  # Max depth
 6 | 
 7 | 
 8 |     mkdir "folder$depth"
 9 |     cd "folder$depth"
10 | 
11 |     touch "echo-love"
12 | 
13 |     if [ $depth -eq 5 ]; then
14 |         echo "echo \"I love myself.\"" > "echo-love"
15 |         chmod +x "echo-love"
16 |     fi
17 | 
18 |     if [ $depth -lt $max_depth ]; then
19 |         create_nested_folders $((depth + 1)) $max_depth
20 |     fi
21 | 
22 |     cd ..
23 | }
24 | 
25 | max_depth=10
26 | 
27 | create_nested_folders 1 $max_depth
28 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/2/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/check/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | count_files() {
 4 |     # echo $1 >> tmp.log
 5 |     local dir=$1
 6 |     local count=0
 7 | 
 8 |     for file in "$dir"/*; do
 9 |         if [ -f "$file" ]; then
10 |             count=$((count + 1))
11 |         elif [ -d "$file" ]; then
12 |             count_sub=$(count_files "$file")
13 |             count=$((count + count_sub))
14 |         fi
15 |     done
16 | 
17 |     echo "$count"
18 | }
19 | 
20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
21 | 
22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/init/nested_folders.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | create_nested_folders() {
 4 |     local depth=$1  # Current depth
 5 |     local max_depth=$2  # Max depth
 6 | 
 7 | 
 8 |     mkdir "folder$depth"
 9 |     cd "folder$depth"
10 | 
11 |     touch "echo-love"
12 | 
13 |     if [ $depth -eq 5 ]; then
14 |         echo "echo \"I love myself.\"" > "echo-love"
15 |         chmod +x "echo-love"
16 |     fi
17 | 
18 |     if [ $depth -lt $max_depth ]; then
19 |         create_nested_folders $((depth + 1)) $max_depth
20 |     fi
21 | 
22 |     cd ..
23 | }
24 | 
25 | max_depth=10
26 | 
27 | create_nested_folders 1 $max_depth
28 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/3/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/check/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | count_files() {
 4 |     # echo $1 >> tmp.log
 5 |     local dir=$1
 6 |     local count=0
 7 | 
 8 |     for file in "$dir"/*; do
 9 |         if [ -f "$file" ]; then
10 |             count=$((count + 1))
11 |         elif [ -d "$file" ]; then
12 |             count_sub=$(count_files "$file")
13 |             count=$((count + count_sub))
14 |         fi
15 |     done
16 | 
17 |     echo "$count"
18 | }
19 | 
20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
21 | 
22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/init/nested_folders.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | create_nested_folders() {
 4 |     local depth=$1  # Current depth
 5 |     local max_depth=$2  # Max depth
 6 | 
 7 | 
 8 |     mkdir "folder$depth"
 9 |     cd "folder$depth"
10 | 
11 |     touch "echo-love"
12 | 
13 |     if [ $depth -eq 5 ]; then
14 |         echo "echo \"I love myself.\"" > "echo-love"
15 |         chmod +x "echo-love"
16 |     fi
17 | 
18 |     if [ $depth -lt $max_depth ]; then
19 |         create_nested_folders $((depth + 1)) $max_depth
20 |     fi
21 | 
22 |     cd ..
23 | }
24 | 
25 | max_depth=10
26 | 
27 | create_nested_folders 1 $max_depth
28 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/4/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/checking/0.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | check() {
 4 |     local expression="$*"
 5 |     # echo python3 -c "print(\"%.6f\"%($expression))" >> tmp.log
 6 |     local expected_result=`python3 -c "print(\"%.6f\"%($expression))"`
 7 |     local output=$(calc "${expression}")
 8 |     # echo "$expression", $expected_result, $output >> tmp.log
 9 |     echo `python3 -c "print(abs($output - $expected_result)<1e-5)"`
10 | }
11 | 
12 | # echo > tmp.log
13 | [ `check "15 + (27 * 4) - 10"`x != Truex ] && exit 1
14 | [ `check "8 * (14 - 6) + 12"`x != Truex ] && exit 1
15 | [ `check "3 + (6.7 * 9) - 5.5"`x != Truex ] && exit 1
16 | [ `check "20 / (5 + 2) - 1"`x != Truex ] && exit 1
17 | [ `check "9 * (16 / 8) + 3"`x != Truex ] && exit 1
18 | [ `check "25 - (8 * 3) + 2"`x != Truex ] && exit 1
19 | [ `check "14 + (25.6 / 2) - 5.2"`x != Truex ] && exit 1
20 | [ `check "18 / (6 - 2) + 9"`x != Truex ] && exit 1
21 | [ `check "10 + (15 * 2) - 7"`x != Truex ] && exit 1
22 | [ `check "30 / (5 + 1) - 2"`x != Truex ] && exit 1
23 | [ `check "7 * (18 / 9) + 4"`x != Truex ] && exit 1
24 | [ `check "22 - (4 * 6) + 3"`x != Truex ] && exit 1
25 | [ `check "16 + (14.8 / 2) - 3.2"`x != Truex ] && exit 1
26 | [ `check "24 / (8 - 4) + 5"`x != Truex ] && exit 1
27 | [ `check "12 + (11 * 3) - 6"`x != Truex ] && exit 1
28 | [ `check "35 / (7 + 1) - 4"`x != Truex ] && exit 1
29 | [ `check "6 * (9 / 3) + 7"`x != Truex ] && exit 1
30 | [ `check "19 - (7 * 2) + 5"`x != Truex ] && exit 1
31 | [ `check "13 + (29.5 / 5) - 1.5"`x != Truex ] && exit 1
32 | [ `check "28 / (14 - 6) + 2"`x != Truex ] && exit 1
33 | [ `check "11 + (8 * 5) - 9"`x != Truex ] && exit 1
34 | [ `check "40 / (4 + 2) - 3"`x != Truex ] && exit 1
35 | [ `check "4 * (12 / 6) + 6"`x != Truex ] && exit 1
36 | [ `check "23 - (6 * 4) + 8"`x != Truex ] && exit 1
37 | [ `check "17 + (18.2 / 2) - 2.8"`x != Truex ] && exit 1
38 | [ `check "36 / (6 - 3) + 10"`x != Truex ] && exit 1
39 | [ `check "5 + (10 * 4) - 8"`x != Truex ] && exit 1
40 | [ `check "50 / (5 + 2) - 6"`x != Truex ] && exit 1
41 | [ `check "8 * (16 / 4) + 9"`x != Truex ] && exit 1
42 | [ `check "21 - (9 * 2) + 4"`x != Truex ] && exit 1
43 | 
44 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/checking/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | count_files() {
 4 |     # echo $1 >> tmp.log
 5 |     local dir=$1
 6 |     local count=0
 7 | 
 8 |     for file in "$dir"/*; do
 9 |         if [ -f "$file" ]; then
10 |             count=$((count + 1))
11 |         elif [ -d "$file" ]; then
12 |             count_sub=$(count_files "$file")
13 |             count=$((count + count_sub))
14 |         fi
15 |     done
16 | 
17 |     echo "$count"
18 | }
19 | 
20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
21 | 
22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/checking/2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | check() {
 4 |     target=`date -d "$1" +"$2"`
 5 |     output=`date-format "$1" "$2"` || exit 1
 6 |     [ "$output"x != "$target"x ] && exit 1
 7 |     exit 0
 8 | }
 9 | 
10 | check "2023-5-1" "%Y-%m" || exit 1
11 | check "23-5-2" "%Y-%m-%d" || exit 1
12 | check "2023-5-1" "%Y/%m" || exit 1
13 | check "2023-5-1" "%m/%d" || exit 1
14 | check "2023/5/10" "%d/%m" || exit 1
15 | check "2021/05/1" "Date: %Y-%m-%d" || exit 1
16 | 
17 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/example/0.sh:
--------------------------------------------------------------------------------
1 | echo '#!/bin/bash
2 | python3 -c "print(\"%.6f\"%($*))"' > calc
3 | chmod +x calc
4 | mv calc /usr/local/bin/
5 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/example/2.sh:
--------------------------------------------------------------------------------
1 | echo '#!/bin/bash
2 | 
3 | date -d "$1" +"$2"
4 | 
5 | ' > /usr/local/bin/date-format
6 | chmod +x /usr/local/bin/date-format
7 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/init/1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/data/os_interaction/scripts/5/init/1.sh


--------------------------------------------------------------------------------
/data/os_interaction/scripts/5/prompt.md:
--------------------------------------------------------------------------------
 1 | generate 5 bash problems, and their corresponding solutions and checking script. Note that the solution should contains multi-lines, and the checking script should exit 0 when succeed and exit 1 when failed. Besides, the problems, solutions, and the checking script should match the following format (the [TODO(description)] tags represent the blanks that you should fill):
 2 | 
 3 | Problem [TODO(index)]: "I would like to implement the following function: the \"[TODO(command)]\" command can help [TODO(function)]. For example, if I enter command [TODO(example input, command and its parameters)], the output will be [TODO(example result)]."
 4 | 
 5 | Solution [TODO(index)]
 6 | 
 7 | ```bash
 8 | echo '#!/bin/bash
 9 | 
10 | [TODO(implement, multi-lines)]
11 | 
12 | ' > /usr/local/bin/[TODO(command)]
13 | chmod +x /usr/local/bin/[TODO(command)]
14 | ```
15 | 
16 | Checking Script [TODO(index)]
17 | 
18 | ```bash
19 | [TODO(some preparation)]
20 | 
21 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1
22 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1
23 | ...
24 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1
25 | [ [TODO(command)] != '[TODO(the correct answer)]' ] && exit 1
26 | exit 0
27 | ```
28 | 
29 | For example:
30 | 
31 | Problem 1: "I would like to implement the following function: entering the \"count\" command will counts the number of regular files in a directory and its subdirectories(recursively), and displays the total count. If there is a link or something, count it separately. For example, I can enter \"count /usr/local\" to get the number of regular files in /usr/local recursively. If there are 5 regular files in that, the output is \"5\"."
32 | 
33 | Solution 1
34 | 
35 | ```bash
36 | echo '#!/bin/bash
37 | 
38 | count_files() {
39 |     local dir=$1
40 |     local count=0
41 | 
42 |     for file in "$dir"/*; do
43 |         if [ -f "$file" ]; then
44 |             count=$((count + 1))
45 |         elif [ -d "$file" ]; then
46 |             count_sub=$(count_files "$file")
47 |             count=$((count + count_sub))
48 |         fi
49 |     done
50 | 
51 |     echo "$count"
52 | }
53 | 
54 | directory="$1"
55 | total_count=$(count_files "$directory")
56 | echo "$total_count"' > /usr/local/bin/count
57 | chmod +x /usr/local/bin/count
58 | ```
59 | 
60 | Checking Script 1
61 | 
62 | ```bash
63 | #!/bin/bash
64 | 
65 | count_files() {
66 |     # echo $1 >> tmp.log
67 |     local dir=$1
68 |     local count=0
69 | 
70 |     for file in "$dir"/*; do
71 |         if [ -f "$file" ]; then
72 |             count=$((count + 1))
73 |         elif [ -d "$file" ]; then
74 |             count_sub=$(count_files "$file")
75 |             count=$((count + count_sub))
76 |         fi
77 |     done
78 | 
79 |     echo "$count"
80 | }
81 | 
82 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
83 | 
84 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
85 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
86 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
87 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
88 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
89 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
90 | exit 0
91 | ```
92 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/6/collecting.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | 
 4 | with open("permission.json") as f:
 5 |     p = f.read()
 6 | 
 7 | f = open("collected.json", "a")
 8 | correct = 0
 9 | while correct < 1000:
10 |     try:
11 |         resp = requests.post(
12 |             "http://45.41.95.10:10001/api/openai/chat-completion",
13 |             json={
14 |                 "model": "gpt-3.5-turbo",
15 |                 "messages": [
16 |                     {"role": "user",
17 |                      "content": "The topic is: linux permission questions.\ngenerate one like this:\n" + p[2:-1]},
18 |                 ]
19 |             },
20 |             timeout=60
21 |         )
22 |         entry = json.loads(resp.json()["choices"][0]["message"]["content"])
23 |         f.write(json.dumps(entry))
24 |         f.write(",\n")
25 |         f.flush()
26 |         correct += 1
27 |     except Exception as e:
28 |         print(e)
29 |     if correct % 50 == 0:
30 |         print(correct)
31 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/6/stackoverflow/spider.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     f = open("data.csv", "a")
 7 |     for tag in ["linux", "bash", "operating-system", "ubuntu"]:
 8 |         for i in range(1, 10):
 9 |             r = requests.get(f"https://stackoverflow.com/questions/tagged/{tag}?tab=votes&page={i}&pagesize=50")
10 |             soup = BeautifulSoup(r.text, "html.parser")
11 |             question_div = soup.find("div", {"id": "questions"})
12 |             for question in question_div.find_all("div", class_="s-post-summary--content"):
13 |                 title = question.h3.a
14 |                 f.write(title.string + ", " + title["href"] + "\n")
15 |                 f.flush()
16 |     f.close()
17 | 
18 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/check/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | count_files() {
 4 |     # echo $1 >> tmp.log
 5 |     local dir=$1
 6 |     local count=0
 7 | 
 8 |     for file in "$dir"/*; do
 9 |         if [ -f "$file" ]; then
10 |             count=$((count + 1))
11 |         elif [ -d "$file" ]; then
12 |             count_sub=$(count_files "$file")
13 |             count=$((count + count_sub))
14 |         fi
15 |     done
16 | 
17 |     echo "$count"
18 | }
19 | 
20 | # echo `count_files "/usr/local/bin"`, `count "/usr/local/bin"`
21 | 
22 | [ `count_files "/usr/local/bin"`x != `count "/usr/local/bin"`x ] && exit 1
23 | [ `count_files "/root"`x != `count "/root"`x ] && exit 1
24 | [ `count_files "/bin"`x != `count "/bin"`x ] && exit 1
25 | [ `count_files "/lib"`x != `count "/lib"`x ] && exit 1
26 | [ `count_files "/dev"`x != `count "/dev"`x ] && exit 1
27 | [ `count_files "/usr/include"`x != `count "/usr/include"`x ] && exit 1
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/example/1.sh:
--------------------------------------------------------------------------------
 1 | echo '#!/bin/bash
 2 | 
 3 | count_files() {
 4 |     local dir=$1
 5 |     local count=0
 6 | 
 7 |     for file in "$dir"/*; do
 8 |         if [ -f "$file" ]; then
 9 |             count=$((count + 1))
10 |         elif [ -d "$file" ]; then
11 |             count_sub=$(count_files "$file")
12 |             count=$((count + count_sub))
13 |         fi
14 |     done
15 | 
16 |     echo "$count"
17 | }
18 | 
19 | directory="$1"
20 | total_count=$(count_files "$directory")
21 | echo "$total_count"' > /usr/local/bin/count
22 | chmod +x /usr/local/bin/count


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/init/gen_words.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a vocab=('aa' 'aaa' 'ab' 'abc' 'able' 'abut' 'ace' 'ache' 'act' 'acm')
 4 | declare -a sep=(' ' '  ' '   ' '    ')
 5 | 
 6 | out='/usr/words.txt'
 7 | 
 8 | echo -n "${vocab[RANDOM % 10]}" > "${out}"
 9 | for i in {1..99}; do
10 |   echo -n "${sep[RANDOM % 4]}${vocab[RANDOM % 10]}" >> "${out}"
11 | done
12 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/init/install_nettools.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | apt-get update && apt-get install -y net-tools iproute2 lsof


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/init/nested_folders.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | create_nested_folders() {
 4 |     local depth=$1  # Current depth
 5 |     local max_depth=$2  # Max depth
 6 | 
 7 | 
 8 |     mkdir "folder$depth"
 9 |     cd "folder$depth"
10 | 
11 |     touch "echo-love"
12 | 
13 |     if [ $depth -eq 5 ]; then
14 |         echo "echo \"I love myself.\"" > "echo-love"
15 |         chmod +x "echo-love"
16 |     fi
17 | 
18 |     if [ $depth -lt $max_depth ]; then
19 |         create_nested_folders $((depth + 1)) $max_depth
20 |     fi
21 | 
22 |     cd ..
23 | }
24 | 
25 | max_depth=10
26 | 
27 | create_nested_folders 1 $max_depth
28 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/7/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/check/0.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | check() {
 4 |     local expression="$*"
 5 |     # echo python3 -c "print(\"%.6f\"%($expression))" >> tmp.log
 6 |     local expected_result=`python3 -c "print(\"%.6f\"%($expression))"`
 7 |     local output=$(calc "${expression}")
 8 |     # echo "$expression", $expected_result, $output >> tmp.log
 9 |     echo `python3 -c "print(abs($output - $expected_result)<1e-5)"`
10 | }
11 | 
12 | # echo > tmp.log
13 | [ `check "15 + (27 * 4) - 10"`x != Truex ] && exit 1
14 | [ `check "8 * (14 - 6) + 12"`x != Truex ] && exit 1
15 | [ `check "3 + (6.7 * 9) - 5.5"`x != Truex ] && exit 1
16 | [ `check "20 / (5 + 2) - 1"`x != Truex ] && exit 1
17 | [ `check "9 * (16 / 8) + 3"`x != Truex ] && exit 1
18 | [ `check "25 - (8 * 3) + 2"`x != Truex ] && exit 1
19 | [ `check "14 + (25.6 / 2) - 5.2"`x != Truex ] && exit 1
20 | [ `check "18 / (6 - 2) + 9"`x != Truex ] && exit 1
21 | [ `check "17 + (18.2 / 2) - 2.8"`x != Truex ] && exit 1
22 | [ `check "36 / (6 - 3) + 10"`x != Truex ] && exit 1
23 | [ `check "5 + (10 * 4) - 8"`x != Truex ] && exit 1
24 | [ `check "50 / (5 + 2) - 6"`x != Truex ] && exit 1
25 | [ `check "8 * (16 / 4) + 9"`x != Truex ] && exit 1
26 | [ `check "21 - (9 * 2) + 4"`x != Truex ] && exit 1
27 | 
28 | exit 0


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/check/containing.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v2 in v1:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/check/in.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def norm_newline(s):
 4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
 5 | 
 6 | v1 = norm_newline(argv[1]).strip()
 7 | v2 = norm_newline(argv[2]).strip()
 8 | 
 9 | if v1 in v2:
10 |   exit(0)
11 | else:
12 |   exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/check/integer-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | if int(argv[1]) == int(argv[2]): exit(0)
3 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/check/size-match.py:
--------------------------------------------------------------------------------
 1 | from sys import argv
 2 | 
 3 | def analysis_size(size_str):
 4 |     size_str = size_str.strip()
 5 |     availables = {
 6 |         "B": 1,
 7 |         "Byte": 1,
 8 |         "K": 1024,
 9 |         "KB": 1024,
10 |         "M": 1024*1024,
11 |         "MB": 1024*1024,
12 |         "G": 1024*1024*1024,
13 |         "GB": 1024*1024*1024,
14 |         "T": 1024*1024*1024*1024,
15 |         "TB": 1024*1024*1024*1024,
16 |         "P": 1024*1024*1024*1024*1024,
17 |         "PB": 1024*1024*1024*1024*1024,        
18 |     }
19 |     for size_unit in availables:
20 |         if size_str.endswith(size_unit):
21 |             return int(size_str[:-len(size_unit)]) * availables[size_unit]
22 |     return int(size_str)
23 | 
24 | if analysis_size(argv[1]) == analysis_size(argv[2]): 
25 |     exit(0)
26 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/check/string-match.py:
--------------------------------------------------------------------------------
1 | from sys import argv
2 | 
3 | def norm_newline(s):
4 |   return s.replace("\r\n", "\n").replace("\r", "\n")
5 | 
6 | if norm_newline(argv[1]).strip() == norm_newline(argv[2]).strip():
7 |   exit(0)
8 | exit(1)


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/example/0.sh:
--------------------------------------------------------------------------------
1 | echo '#!/bin/bash
2 | python3 -c "print(\"%.6f\"%($*))"' > calc
3 | chmod +x calc
4 | mv calc /usr/local/bin/
5 | 


--------------------------------------------------------------------------------
/data/os_interaction/scripts/dev/init/stock-log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the possible values for each field
 4 | names=("Alice" "Bob")
 5 | actions=("Purchase" "Sell")
 6 | 
 7 | # Generate 400 random lines
 8 | for ((i=1; i<=401; i++))
 9 | do
10 |     # Randomly select values for each field
11 |     name=${names[$RANDOM % ${#names[@]}]}
12 |     action=${actions[$RANDOM % ${#actions[@]}]}
13 |     stock_index=$((RANDOM % 100))
14 |     count=$((RANDOM % 1000))
15 |     
16 |     # Write the line to the file
17 |     echo "$name | $action | $stock_index | $count" >> /usr/stock.log
18 | done
19 | 


--------------------------------------------------------------------------------
/docs/Config_cn.md:
--------------------------------------------------------------------------------
  1 | # 配置系统
  2 | 
  3 | [🌏English](Config_cn.md)
  4 | 
  5 | ## 基本语法
  6 | 
  7 | 配置系统采用了YAML格式。为了方便配置，我们在基础的YAML语法上做了一些扩展。
  8 | `import`, `default`, `overwrite`是我们扩展的关键字。
  9 | 
 10 | ### import
 11 | 
 12 | `import`关键字用于导入其他文件中的配置。例如以下两个写法是等价的：
 13 | 
 14 | 写法一：
 15 | 
 16 | ```yaml
 17 | # config.yaml
 18 | definition:
 19 |   def1: something...
 20 |   def2: something...
 21 | ```
 22 | 
 23 | 写法二：
 24 | 
 25 | ```yaml
 26 | # def1.yaml
 27 | def1: something...
 28 | 
 29 | # def2.yaml
 30 | def2: something...
 31 | 
 32 | # config.yaml
 33 | definition:
 34 |   import:
 35 |     - def1.yaml
 36 |     - def2.yaml
 37 | ```
 38 | 
 39 | `import`关键字支持字符串或者列表作为值，
 40 | 分别对应导入单个文件和导入多个文件的情况。
 41 | 
 42 | 在导入过程中，如果被导入文件中有`import`关键字，
 43 | 则将先执行被导入文件的`import`。
 44 | 对于后两个关键字也是如此。
 45 | 
 46 | 导入过程中如果遇到了键冲突的情况，将尝试递归地合并冲突的键所对应的值。
 47 | 如果遇到无法合并的情况，则后出现的将覆盖先出现的。
 48 | 
 49 | ### default
 50 | 
 51 | `default`关键字用于指定默认值。例如以下两个写法是等价的：
 52 | 
 53 | 写法一：
 54 | 
 55 | ```yaml
 56 | definition:
 57 |   def1:
 58 |     type: int
 59 |     value: 1
 60 |   def2:
 61 |     type: int
 62 |     value: 2
 63 |   def3:
 64 |     type: float
 65 |     value: 1.1
 66 | ```
 67 | 
 68 | 写法二：
 69 | 
 70 | ```yaml
 71 | definition:
 72 |   default:
 73 |     type: int
 74 |   def1:
 75 |     value: 1
 76 |   def2:
 77 |     value: 2
 78 |   def3:
 79 |     type: float
 80 |     value: 1.1
 81 | ```
 82 | 
 83 | `default`关键字支持字符串、列表或者字典作为值。
 84 | config解析器将尝试合并`default`的值和与`default`并列的键所对应的值。
 85 | 如果遇到无法合并的情况，则`default`关键字下的值具有更低的优先级。
 86 | 
 87 | ### overwrite
 88 | 
 89 | `overwrite`关键字的用法和`default`类似，
 90 | 只不过在遇到冲突情况时`overwrite`关键字下的值具有更高的优先级。
 91 | 这个关键字常与`import`联用，用于统一设置这一配置文件下所要求的值。
 92 | 
 93 | ## 配置文件
 94 | 
 95 | 配置文件的主要目录结构如下：
 96 | 
 97 | ```
 98 | configs
 99 | ├── assignments
100 | │   ├── definition.yaml
101 | │   ├── default.yaml
102 | │   └── ...
103 | ├── agents
104 | ├── tasks
105 | │   ├── task_assembly.yaml
106 | │   └── ...
107 | └── start_task.yaml
108 | ```
109 | 
110 | ### assignments
111 | 
112 | `assignments`目录下存放了所有的任务配置文件。
113 | 其中`definition.yaml`集合了所有的任务定义和模型定义。
114 | 
115 | 单个任务配置文件主要需要以下字段：
116 | 
117 | - `definition`: 通常import自`definition.yaml`，用于定义任务和模型。
118 | - `concurrency`: 用于定义模型的最大并行数。
119 | - `assignments`: 接受多个`assignment`，用于定义任务的具体分配。
120 | - `output`: 用于定义输出文件的路径。
121 | 
122 | 单个`assignment`需要两个字段：
123 | 
124 | - `agents`: 此任务需要运行的agent的名称。
125 | - `tasks`: 此任务需要运行的task的名称。
126 | 
127 | ### agents
128 | 
129 | `agents`目录下存放了所有的agent配置文件。
130 | 配置中键是agent的名称，值是agent的配置。
131 | 单个agent配置需要以下字段：
132 | 
133 | - `module`: 定义对应的agent client模块。
134 | - `parameters`: 定义需要传入对应模块的参数。
135 | 
136 | ### tasks
137 | 
138 | `tasks`目录下存放了所有的task配置文件。
139 | 其中`task_assembly.yaml`集合了所有的task定义。
140 | 如果只是想运行现有的任务，一般不需要修改此目录下的文件。
141 | 
142 | 与agent配置类似，键是task的名称，值是task的配置。
143 | 单个task配置需要以下字段：
144 | 
145 | - `module`: 定义对应的task模块。
146 | - `parameters`: 定义需要传入对应模块的参数。
147 | 
148 | ### start_task.yaml
149 | 
150 | 这个配置文件用于与`src.start_task`配合，自动化批量启动task_worker。
151 | 这个文件的字段如下：
152 | 
153 | - `definition`: 用于定义任务，通常import自`task_assembly.yaml`。
154 | - `start(Optional)`: 用于指定需要启动的任务，键是任务名称，值是需要启动的worker的个数。
155 | - `controller_address(Optional)`: 用于指定controller的地址，默认http://localhost:5000/api/
156 | 


--------------------------------------------------------------------------------
/docs/Entrance_cn.md:
--------------------------------------------------------------------------------
 1 | # 框架入口
 2 | 
 3 | [🌏English](Entrance_en.md)
 4 | 
 5 | 框架主要的入口是：
 6 | 
 7 | - `src.server.task_controller`: 用于手动启动task_controller。
 8 | - `src.start_task`: 用于启动task_worker。
 9 | - `src.assigner`: 用于启动评测。
10 | - `src.server.task_worker`: 用于手动启动task_worker。
11 | 
12 | ## src.server.task_controller
13 | 
14 | task_controller是task server的核心，用于管理所有的task_worker。
15 | task_controller应该是最先启动的，且推荐常开，如无必要也建议全局唯一。
16 | task_controller默认运行在5000端口，也可以通过`--port -p`参数指定。
17 | 所有接口有统一的前缀`/api/`。
18 | 
19 | 一个启动task_controller并指定其运行在3000端口的示例：
20 | 
21 | ```bash
22 | python -m src.server.task_controller -p 3000
23 | ```
24 | 
25 | task_controller有以下几个用于监控的接口：
26 | 
27 | | 接口             | 方法   | 参数 | 说明                                                   |
28 | |----------------|------|----|------------------------------------------------------|
29 | | /list_workers  | GET  | 无  | 返回所有的task_worker                                     |
30 | | /list_sessions | GET  | 无  | 返回所有的session                                         |
31 | | /sync_all      | POST | 无  | 同步所有的task_worker上正在运行的session，如controller意外重启应先调用此接口 |
32 | | /cancel_all    | POST | 无  | 取消所有的task_worker上正在运行的session                        |
33 | 
34 | ## src.start_task
35 | 
36 | start_task是用于启动task_worker的脚本，其主要功能是读取配置文件并启动task_worker。
37 | start_task的配置文件是`configs/start_task.yaml`，具体详见配置文件介绍。
38 | 
39 | start_task的参数如下：
40 | 
41 | - `[--config CONFIG]`: 指定要读取的配置文件，默认为`configs/start_task.yaml`，通常没有必要更改。
42 | - `[--start | -s [TASK_NAME NUM [TASK_NAME NUM ...]]]`: 指定要启动的任务，格式为`TASK_NAME NUM`，其中`TASK_NAME`
43 |   是任务名称，`NUM`是需要启动的worker的个数，如此参数被指定则将覆盖**所有**配置文件中的设置。
44 | - `[--auto-controller | -a]`: 指定是否自动启动task_controller，默认为否。
45 | - `[--base-port | -p PORT]`:
46 |   指定task_worker的基础端口，默认为5001，task_worker将从PORT开始依次启动task_worker。如若共有N个task_worker，那么task_worker的端口将从PORT到PORT+N-1。
47 | 
48 | ## src.assigner
49 | 
50 | assigner是用于启动评测的脚本，其主要功能是读取配置文件并启动评测，并将结果实时保存在指定的输出文件夹中。
51 | 
52 | assigner的参数如下：
53 | 
54 | - `[--config CONFIG]`: 指定要读取的配置文件，默认为`configs/assignments/default.yaml`。
55 | - `[--auto-retry]`: 自动重新测试失败的样例
56 | 
57 | 如配置文件中的`output`字段的值中含有`{TIMESTAMP}`，则此处将会被替换为当前时间并继续后续的操作（即相同的配置文件可能会有不同的输出文件夹）。
58 | 
59 | 如果配置中`output`字段指定的目录已经存在，则assigner将会尝试从此文件夹中读取已有的评测结果，在此基础上继续评测。
60 | 
61 | assigner**每次**启动都会将读取的配置文件解析并存储到`output`字段指定的目录中，**如目录中已有配置文件，该文件将被覆盖**。
62 | 
63 | ## src.server.task_worker
64 | 
65 | 一个task_worker对应了一个任务进程，同样的任务可以有多个task_worker。
66 | 如无必要，**不推荐**手动启动task_worker，而是通过`src.start_task`启动。
67 | 
68 | task_worker的参数如下：
69 | 
70 | - `NAME` 任务名称，用于指定要启动的任务。
71 | - `[--config | -c CONFIG]` 指定要读取的配置文件，默认为`configs/tasks/task_assembly.yaml`。
72 | - `[--port | -p PORT]` 指定task_worker的端口，默认为5001。
73 | - `[--controller | -C ADDRESS]` 指定task_controller的地址，默认为http://localhost:5000/api 。
74 | - `[--self ADDRESS]` 指定task_worker的地址，默认为http://localhost:5001/api
75 |   ，此地址将会被task_controller用于与task_worker通信，所以需要确保task_controller能够访问到此地址。
76 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy~=1.23.5
 2 | pydantic~=1.10.12
 3 | requests~=2.28.1
 4 | tqdm~=4.65.0
 5 | pyyaml~=6.0
 6 | jsonlines~=3.1.0
 7 | aiohttp~=3.8.4
 8 | uvicorn~=0.22.0
 9 | fastapi~=0.101.1
10 | urllib3~=1.26.15
11 | mysql-connector-python==8.0.33
12 | docker==6.1.2
13 | SPARQLWrapper
14 | networkx~=2.8.4
15 | anthropic~=0.4.1
16 | fschat~=0.2.31
17 | accelerate~=0.23.0
18 | transformers~=4.34.0


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/__init__.py


--------------------------------------------------------------------------------
/src/client/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import AgentClient
2 | from .agents import *
3 | from .task import TaskClient
4 | 


--------------------------------------------------------------------------------
/src/client/agent.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | 
 4 | class AgentClient:
 5 |     def __init__(self, *args, **kwargs):
 6 |         pass
 7 | 
 8 |     def inference(self, history: List[dict]) -> str:
 9 |         raise NotImplementedError()
10 | 


--------------------------------------------------------------------------------
/src/client/agent_test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from src.configs import ConfigLoader
 4 | from src.typings import InstanceFactory
 5 | from .agent import AgentClient
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('--config', type=str, default='configs/agents/api_agents.yaml')
11 |     parser.add_argument('--agent', type=str, default='gpt-3.5-turbo-0613')
12 |     return parser.parse_args()
13 | 
14 | 
15 | def interaction(agent: AgentClient):
16 |     try:
17 |         history = []
18 |         while True:
19 |             print("================= USER  ===================")
20 |             user = input(">>> ")
21 |             history.append({"role": "user", "content": user})
22 |             try:
23 |                 agent_response = agent.inference(history)
24 |                 print("================ AGENT ====================")
25 |                 print(agent_response)
26 |                 history.append({"role": "agent", "content": agent_response})
27 |             except Exception as e:
28 |                 print(e)
29 |                 exit(0)
30 |     except KeyboardInterrupt:
31 |         print("\n[Exit] KeyboardInterrupt")
32 |         exit(0)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     args = parse_args()
37 |     loader = ConfigLoader()
38 |     config = loader.load_from(args.config)
39 |     assert args.agent in config, f"Agent {args.agent} not found in {args.config}"
40 |     agent_config = config[args.agent]
41 |     factory = InstanceFactory(**agent_config)
42 |     agent_client: AgentClient = factory.create()
43 |     interaction(agent_client)
44 | 


--------------------------------------------------------------------------------
/src/client/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from .fastchat_client import FastChatAgent
2 | from .http_agent import HTTPAgent
3 | 


--------------------------------------------------------------------------------
/src/client/agents/claude_agent.py:
--------------------------------------------------------------------------------
 1 | import anthropic
 2 | import os
 3 | from copy import deepcopy
 4 | from typing import List
 5 | 
 6 | from ..agent import AgentClient
 7 | 
 8 | 
 9 | class Claude(AgentClient):
10 |     def __init__(self, api_args=None, *args, **config):
11 |         super().__init__(*args, **config)
12 |         if not api_args:
13 |             api_args = {}
14 |         api_args = deepcopy(api_args)
15 |         self.key = api_args.pop("key", None) or os.getenv('Claude_API_KEY')
16 |         api_args["model"] = api_args.pop("model", None)
17 |         if not self.key:
18 |             raise ValueError("Claude API KEY is required, please assign api_args.key or set OPENAI_API_KEY "
19 |                              "environment variable.")
20 |         if not api_args["model"]:
21 |             raise ValueError("Claude model is required, please assign api_args.model.")
22 |         self.api_args = api_args
23 |         if not self.api_args.get("stop_sequences"):
24 |             self.api_args["stop_sequences"] = [anthropic.HUMAN_PROMPT]
25 | 
26 |     def inference(self, history: List[dict]) -> str:
27 |         prompt = ""
28 |         for message in history:
29 |             if message["role"] == "user":
30 |                 prompt += anthropic.HUMAN_PROMPT + message["content"]
31 |             else:
32 |                 prompt += anthropic.AI_PROMPT + message["content"]
33 |         prompt += anthropic.AI_PROMPT
34 |         c = anthropic.Client(api_key=self.key)
35 |         resp = c.completions.create(prompt=prompt, **self.api_args)
36 |         return str(resp.completion)
37 | 


--------------------------------------------------------------------------------
/src/client/agents/test_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from src.client import AgentClient
 4 | 
 5 | 
 6 | class CountHistoryAgent(AgentClient):
 7 |     def __init__(self, *args, **kwargs):
 8 |         super().__init__(*args, **kwargs)
 9 | 
10 |     def inference(self, history: List[dict]) -> str:
11 |         return "I received {} items in history.".format(len(history))
12 | 


--------------------------------------------------------------------------------
/src/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/alfworld/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import ALFWorld


--------------------------------------------------------------------------------
/src/server/tasks/alfworld/environment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import json
 4 | import glob
 5 | import random
 6 | import numpy as np 
 7 | 
 8 | import textworld
 9 | import textworld.agents
10 | import textworld.gym
11 | import gym
12 | 
13 | from alfworld.agents.utils.misc import Demangler, get_templated_task_desc, add_task_to_grammar
14 | import alfworld.agents.modules.generic as generic
15 | from alfworld.agents.environment.alfred_tw_env import AlfredTWEnv
16 | 
17 | class SingleAlfredTWEnv(AlfredTWEnv):
18 |     '''
19 |     Interface for Textworld Env 
20 |     Contains only one game_file per environment
21 |     '''
22 | 
23 |     def __init__(self, config, name, train_eval="eval_out_of_distribution"):
24 |         print("Initializing AlfredTWEnv...")
25 |         self.config = config
26 |         self.train_eval = train_eval
27 | 
28 |         self.goal_desc_human_anns_prob = self.config['env']['goal_desc_human_anns_prob']
29 |         self.get_game_logic()
30 |         # self.gen_game_files(regen_game_files=self.config['env']['regen_game_files'])
31 | 
32 |         self.random_seed = 42
33 | 
34 |         self.game_files = [name]
35 |         self.num_games = 1
36 | 
37 | def get_all_game_files(config, split="eval_out_of_distribution"):
38 |     env = AlfredTWEnv(config, train_eval=split)
39 |     game_files = env.game_files
40 |     del env
41 |     return game_files
42 | 
43 | # if __name__=="__main__":
44 | #     os.environ["ALFWORLD_DATA"] = "/data/share/leixy/ReAct/alfworld/data"
45 | #     config=load_config("/data/share/leixy/AgentBench/src/tasks/alfworld/configs/base_config.yaml")
46 | #     game_files = get_all_game_files(config, "train")
47 | #     game_files = [game.split("data/")[-1] for game in game_files]
48 | #     with open("train.json", "w") as f:
49 | #         f.write(json.dumps(game_files, indent=2))
50 | #         f.close()
51 | #     print(len(game_files))
52 | #     print(game_files[0])
53 | 


--------------------------------------------------------------------------------
/src/server/tasks/alfworld/prompts/alfworld_multiturn_cot.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "naive_example": [
 3 |     "You are in the middle of a room. Looking quickly around you, you see a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a countertop 1, a garbagecan 1, a handtowelholder 2, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1.\nYour task is to: put some spraybottle on toilet.\n",
 4 |     "THOUGHT: The task is to put some spraybottle on toilet, so first i need to search the room for some spraybottle.\n ACTION: go to cabinet 1\n",
 5 |     "On the cabinet 1, you see a cloth 1, a soapbar 1, a soapbottle 1.\n",
 6 |     "THOUGHT: On the cabinet 1, there is no spraybottle, so i need to search somewhere else.\n ACTION: go to cabinet 2\n",
 7 |     "The cabinet 2 is closed.\n",
 8 |     "THOUGHT: since the cabinet 2 is closed, i need to open it.\n ACTION: open cabinet 2\n",
 9 |     "You open the cabinet 2. The cabinet 2 is open. In it, you see a candle 1, and a spraybottle 2.\n",
10 |     "THOUGHT: I have found a spraybottle 2, now i need to take it with me.\n ACTION: take spraybottle 2 from cabinet 2\n",
11 |     "You pick up the spraybottle 2 from the cabinet 2.\n",
12 |     "THOUGHT: I have taken the spraybottle 2 and the final target is to put it on toilet, so i need to go to toilet 1.\n ACTION: go to toilet 1\n",
13 |     "On the toilet 1, you see a soapbottle 2.\n",
14 |     "THOUGHT: The final target is to put it on toilet, now i have the spraybottle and i need to put it in/on toilet 1.\n ACTION: put spraybottle 2 in/on toilet 1\n",
15 |     "You put the spraybottle 2 in/on the toilet 1.\n",
16 |     "THOUGHT: I have completed the task.\n ACTION: Task succeeded.\n"
17 |   ]
18 | }


--------------------------------------------------------------------------------
/src/server/tasks/alfworld/utils.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | from typing import List
 3 | import re
 4 | import threading
 5 | import jsonlines
 6 | import yaml
 7 | import json
 8 | import numpy as np
 9 | from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
10 | 
11 | def bleu_score(reference, candidate):
12 |     reference_tokens = reference.split()
13 |     candidate_tokens = candidate.split()
14 | 
15 |     smoothie = SmoothingFunction().method4
16 |     score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)
17 |     return score
18 | 
19 | def process_ob(ob):
20 |     if ob.startswith('You arrive at loc '):
21 |         ob = ob[ob.find('. ')+2:]    
22 |     return ob
23 | 
24 | def process_action(action, choices, limit=0.01, to_print=False):
25 |     if to_print:
26 |         print("preprocess action: ", action)
27 |     match = re.search("ACTION:(.*)", action)
28 |     if match:
29 |         action = match.group(1)
30 |     else:
31 |         return False
32 | 
33 |     action = action.strip().lower().split("\n")[0]
34 |     if not choices:
35 |         return action
36 |     if action in choices:
37 |         return action
38 |     try:
39 |         bleus = [bleu_score(choice, action) for choice in choices]
40 |         max_index = np.argmax(np.array(bleus))
41 |         max_score = bleus[max_index]
42 |         if max_score > limit:
43 |             if to_print:
44 |                 print("processed action: ", choices[max_index], " score: ", max_score)
45 |             return choices[max_index]
46 |     except Exception as e:
47 |         print("encounter exception: ", e)
48 |         print("choices: ", choices)
49 |         print("action: ", action)
50 |     return action
51 | 
52 | def load_prompts(prompts_file):
53 |     with open(prompts_file, 'r') as f:
54 |         d = json.load(f)
55 |         f.close()
56 |     return d
57 | 
58 | def load_config(config_file):
59 |     with open(config_file) as reader:
60 |         config = yaml.safe_load(reader)
61 |     return config


--------------------------------------------------------------------------------
/src/server/tasks/avalon/README.md:
--------------------------------------------------------------------------------
 1 | # AvalonBench
 2 | 
 3 | ## Quick Start 
 4 | 
 5 | ### Start the task server and the assigner
 6 | 
 7 | Start the game (3 is the number of workers)
 8 | ```bash
 9 | python -m src.start_task -a --start avalon-dev-single 3
10 | ```
11 | Start the assigner
12 | ```bash
13 | python -m src.assigner --config ./configs/assignments/test_avalon.yaml
14 | ```
15 | 
16 | ### Customize configurations and data
17 | 
18 | 1. You can modify the file `configs/tasks/avalon.yaml` to configure the agent list. A config file looks like this:
19 | ```yaml
20 | default:
21 |   module: "src.server.tasks.avalon.AvalonBench"
22 |   parameters:
23 |     num_players: 5
24 |     discussion: False
25 | 
26 | avalon-dev-naive:
27 |   parameters:
28 |     name: "AvalonBench-dev-naive"
29 |     data_file: "data/avalon/dev.json"
30 |     agent_list: ["naive", "naive", "naive", "naive", "naive"]
31 | 
32 | avalon-dev-single:
33 |   parameters:
34 |     name: "AvalonBench-dev-single"
35 |     data_file: "data/avalon/dev.json"
36 |     agent_list: ["llm", "naive", "naive", "naive", "naive"]
37 | ```
38 | where `naive` stands for the naive bots. Agents will play the roles with the same index in the data file (see following).
39 | ```plaintext
40 | Note: There should only be one "llm" in the `agent_list`
41 | ```
42 | 
43 | 2. You can also add data in `data/avalon/dev.json` (Note: Currently we only support the 5-player game setting, which includes 1 Merlin, 2 Servants, 1 Minion and 1 Assassin). A data item looks like this:
44 | 
45 | ```json
46 |  {
47 |      "num_players": 5,
48 |      "quest_leader": 0,
49 |      "role_names": ["Assassin", "Servant", "Servant", "Merlin", "Minion"]
50 |  }
51 | ```
52 | where `quest_leader` is the id of the initial quest leader in this game. You can change the game setup by altering `quest_leader` with number from 0 to 4, and by permuting `role_names`.
53 | 
54 | ### Naive experiment
55 | 
56 | You can also start a naive experiment using:
57 | ```bash
58 | python -m src.start_task -a --start avalon-dev-naive 3
59 | ```
60 | where all the agents are naive bots. For details of the naive strategies, please refer to the [paper](https://arxiv.org/pdf/2310.05036.pdf).
61 | 
62 | ## Prompts
63 | 
64 | All the prompts are maintained in `src/server/tasks/avalon/prompt.py`. You can find the respective prompts used in `src/server/tasks/avalon/agents/llm_with_discussion.py` and `src/server/tasks/avalon/wrapper.py`.


--------------------------------------------------------------------------------
/src/server/tasks/avalon/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import AvalonBench


--------------------------------------------------------------------------------
/src/server/tasks/avalon/agents/agent.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from ..engine import AvalonBasicConfig
 3 | class Agent:
 4 |     r"""The base class for all agents.
 5 | 
 6 |     Args:
 7 |         id (int): The Player id of the agent.
 8 |         role (int): The role (id) of the agent.
 9 |         config (AvalonBasicConfig): The config of the agent.
10 | 
11 |     To implement your own agent, subclass this class and implement the following methods:
12 |         - :method:`Agent.propose_team`
13 |         - :method:`Agent.vote_on_team`
14 |         - :method:`Agent.vote_on_mission`
15 |     """
16 |     def __init__(self, id: int, role: int, config: AvalonBasicConfig) -> None:
17 |         self.id = id
18 |         self.name = f"Player {id}"
19 |         self.role = role
20 |         self.role_name = config.ROLES[role]
21 |         self.config = config
22 | 
23 | 
24 |     def propose_team(self, mission_id: int) -> frozenset[int]:
25 |         r"""Propose a team of given size.
26 | 
27 |         Args:
28 |             mission_id (int): The id of the mission. team_size = config.num_players_for_quest[mission_id]
29 | 
30 |         Returns:
31 |             frozenset[int]: The list of player ids to be included in the team.
32 |         """
33 |         raise NotImplementedError
34 |     
35 | 
36 |     def vote_on_team(self, mission_id: int, team: frozenset[int]) -> bool:
37 |         r"""Vote on a given team.
38 | 
39 |         Args:
40 |             mission_id (int): The id of the mission. num_fails = self.config.num_fails_for_quest[mission_id]
41 |             team (frozenset[int]): The list of player ids included in the team.
42 | 
43 |         Returns:
44 |             bool: The vote result.
45 |         """
46 |         raise NotImplementedError
47 |     
48 |     def vote_on_mission(self, mission_id: int, quest_team: frozenset[int]) -> bool:
49 |         r"""Vote on a quest (team).
50 | 
51 |         Args:
52 |             mission_id (int): The id of the mission. num_fails = self.config.num_fails_for_quest[mission_id]
53 |             quest_team (frozenset[int]): The list of player ids included in the quest.
54 |         
55 |         Returns:
56 |             bool: The vote result.
57 |         """
58 |         raise NotImplementedError
59 |     
60 |     
61 |     def assassinate(self, num_players: int) -> int:
62 |         r"""Assassinate a player.
63 | 
64 |         Args:
65 |             num_players (int): The number of players in the game.
66 | 
67 |         Returns:
68 |             int: The id of the player to assassinate. The id is in the range [0, num_players).
69 |         """
70 |         raise NotImplementedError
71 |     
72 | 
73 |     def get_believed_sides(self, num_players: int) -> List[float]:
74 |         r"""Get the believed sides of all players.
75 | 
76 |         Args:
77 |             num_players (int): The number of players in the game.
78 |         
79 |         Returns:
80 |             List[float]: The list of believed sides (probability) of all players.
81 |         """
82 |         raise NotImplementedError


--------------------------------------------------------------------------------
/src/server/tasks/avalon/avalon_exception.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | from src.typings import AgentBenchException
 3 | 
 4 | class AvalonException(AgentBenchException):
 5 |     def __init__(self, reason: str, detail: Union[str, None] = None) -> None:
 6 |         super().__init__()
 7 |         self.reason = reason
 8 |         self.detail = detail
 9 | 
10 |     def __str__(self) -> str:
11 |         if not self.detail:
12 |             return "{CLASS_NAME}[{REASON}]".format(
13 |                 CLASS_NAME=self.__class__.__name__, REASON=self.reason
14 |             )
15 |         else:
16 |             return "{CLASS_NAME}[{REASON}]: {DETAIL}".format(
17 |                 CLASS_NAME=self.__class__.__name__,
18 |                 REASON=self.reason,
19 |                 DETAIL=self.detail,
20 |             )
21 | 
22 | class AvalonEnvException(AvalonException):
23 |     def __init__(self, detail: Union[str, None] = None) -> None:
24 |         super().__init__("Avalon Environment Exception", detail)
25 | 
26 | class AvalonAgentActionException(AvalonException):
27 |     def __init__(self, detail: Union[str, None] = None) -> None:
28 |         super().__init__("Invalid action (result) with retry", detail)


--------------------------------------------------------------------------------
/src/server/tasks/avalon/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def get_vote_result(answer: str):
 4 |     match_vote = "Yes|No"
 5 |     vote_result = []
 6 |     
 7 |     vote_result = re.findall(match_vote, answer)
 8 | 
 9 |     result = '' if len(vote_result) == 0 else vote_result[-1]
10 | 
11 |     return result
12 | 
13 | def get_team_result(answer: str):
14 |     match_num = r"\d+"
15 |     player_list = []
16 |     
17 |     player_list = re.findall(match_num, answer)
18 | 
19 |     player_list = [int(id) for id in player_list]
20 | 
21 |     return player_list
22 | 
23 | def get_assassination_result(message: str, answer: str): 
24 |     match_num = r"\d+"
25 |     player_id = []
26 |         
27 |     player_id = re.findall(match_num, str(message)+str(answer)) 
28 | 
29 |     player_id = int(player_id[-1])
30 | 
31 |     return player_id
32 | 
33 | def get_believed_player_sides(answer):
34 |     scores = eval(answer.split("Answer: ")[-1])
35 | 
36 |     return scores
37 | 
38 | def verbalize_team_result(team: frozenset, votes, outcome: bool):
39 |     verbal_vote = {
40 |         0: "reject",
41 |         1: "approve"
42 |     }
43 |     verbalized_result = ""
44 |     if outcome == True:
45 |         verbalized_result = f"The team {str(list(team))} is approved."
46 |     elif outcome == False:
47 |         verbalized_result = f"The team {str(list(team))} is rejected."
48 |     else:
49 |         raise ValueError("Invalid outcome %s" % outcome)
50 |     
51 |     for idx, vote in enumerate(votes):
52 |         verbalized_result += " Player %d voted %s." % (idx, verbal_vote[vote])
53 |     
54 |     return verbalized_result
55 | 
56 | def verbalize_mission_result(team: frozenset, outcome: bool):
57 |     verbalized_result = ""
58 |     if outcome == True:
59 |         verbalized_result = "The mission succeeded."
60 |     elif outcome == False:
61 |         verbalized_result = "The mission failed."
62 |     else:
63 |         raise ValueError("Invalid outcome %s" % outcome)
64 |     
65 |     verbalized_result += " The team is %s, which contains" % str(list(team))
66 |     for member in team:
67 |         verbalized_result += " Player %s" % str(member)
68 | 
69 |     return verbalized_result


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/action1.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | import random
 3 | 
 4 | from sdk.ai_client import Action, AIClient, Game
 5 | from prompt.en import enemy_fish, your_fish
 6 | 
 7 | class AI(AIClient):
 8 |     def __init__(self, stage) -> None:
 9 |         super().__init__()
10 |         self.stage = stage
11 |         self.name_to_id = {"spray": 1, "flame": 2, "eel": 3, "sunfish": 4, "barracuda": 5, "mobula": 6, "octopus": 8, "whiteshark": 9, "hammerhead": 10}
12 |         self.id_to_name = {}
13 |         for name, id in self.name_to_id.items():
14 |             self.id_to_name[id] = name
15 |         self.id_to_name[-1] = "unknown"
16 |         self.ai_fish = []
17 |         random.seed(42)
18 | 
19 |     def Pick(self, game: Game) -> List[int]:
20 |         pick_list = []     
21 |         self.pos_to_name = enemy_fish
22 |         
23 |         for i in range(4):
24 |             pick_list.append(self.name_to_id[self.pos_to_name[i]])
25 |             self.ai_fish.append(self.name_to_id[your_fish[i]])
26 |             
27 |         #random.shuffle(pick_list)
28 |         return pick_list
29 | 
30 |     def Assert(self, game: Game) -> Tuple[int, int]:
31 |         if self.stage == 1:
32 |             return (-1, -1)
33 |         else:
34 |             return (self.get_enemy_living_fishes()[0], random.choice(self.ai_fish))
35 | 
36 |     def Act(self, game: Game) -> Action:
37 |         action = Action(game)
38 |         my_pos: int = self.get_my_living_fishes()[0]
39 |         action.set_action_fish(my_pos)
40 |         return self.auto_valid_action(my_pos, action)
41 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/basline1.py:
--------------------------------------------------------------------------------
1 | from action1 import AI
2 | import sys
3 | 
4 | if __name__ == "__main__":
5 |     stage = int(sys.argv[1])
6 |     
7 |     myAI = AI(stage)
8 |     myAI.run()
9 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/basline2.py:
--------------------------------------------------------------------------------
1 | from action2 import AI
2 | import sys
3 | 
4 | if __name__ == "__main__":
5 |     stage = int(sys.argv[1])
6 |     
7 |     myAI = AI(stage)
8 |     myAI.run()
9 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/client.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | import threading
 3 | import json
 4 | import time
 5 | 
 6 | class Client:
 7 |     def __init__(self, port, host='localhost'):
 8 |         print(333)
 9 |         self.host = host
10 |         self.port = port
11 |         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
12 |         self.socket.connect((self.host, self.port))
13 |         self.stop_flag = threading.Event()
14 | 
15 |     def quit(self):
16 |         self.send_message("")
17 |         self.stop()
18 | 
19 |     def llm_call(self, history, prompt, system):
20 |         message = []
21 |     
22 |         if system:
23 |             history.append((system, "Okay, I will play the game with you according to the rules."))
24 |         
25 |         for ix, chat in enumerate(history):
26 |             message.append({
27 |                 "role": "user",
28 |                 "content": chat[0]
29 |             })
30 |             message.append({
31 |                 "role": "agent",
32 |                 "content": chat[1]
33 |             })
34 |         
35 |         message.append({
36 |             "role": "user",
37 |             "content": prompt
38 |         })
39 |         #with open("client.txt", "a") as f:
40 |         #    f.write(json.dumps(message) + "\n")
41 |         self.send_message(json.dumps(message))
42 |         output = self.receive_messages()
43 |         
44 |         #with open("client.txt", "a") as f:
45 |         #    f.write(json.dumps(output) + "\n######################\n")
46 |         return output
47 |     
48 |     def receive_messages(self):
49 |         while not self.stop_flag.is_set():
50 |             data = self.socket.recv(1000000).decode()
51 |             if data:
52 |                 return data
53 | 
54 |     def send_message(self, message):
55 |         self.socket.sendall(message.encode())
56 | 
57 |     def stop(self):
58 |         self.stop_flag.set()
59 |         self.socket.close()


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/main.py:
--------------------------------------------------------------------------------
 1 | print(777)
 2 | import sys
 3 | from client import Client
 4 | 
 5 | print(666)
 6 | if __name__ == "__main__":
 7 |     print(555)
 8 |     language = sys.argv[1]
 9 |     stage = int(sys.argv[2])
10 |     order = int(sys.argv[3])
11 |     save_dir = sys.argv[4]
12 |     port = int(sys.argv[5])
13 |     client = Client(port=port)
14 |     if language == 'en':
15 |         from AI_En import Agent
16 |         myAI = Agent(client, stage, order, save_dir)
17 |     else:
18 |         from AI_Cn import Agent
19 |         myAI = Agent(client, stage, order, save_dir)
20 |     
21 |     myAI.run()
22 |     client.quit()


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/prompt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/prompt/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/prompt/cn.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | action_format = {
 4 |     "选择的鱼": "选择我方仍活着的一条鱼",
 5 |     "动作": "选择主动技能或普通攻击",
 6 |     "目标位置": "目标位置，必须在 [0, 3] 范围内"
 7 | }
 8 | 
 9 | guess_format = {
10 |     "猜测种类": "选择你想要猜测的种类",
11 |     "目标位置": "目标位置，必须在 [0, 3] 范围内"
12 | }
13 | 
14 | fish_description = {
15 |     "射水鱼": {
16 |         "被动技能": r"反弹：队友被直接攻击后，若其生命值少于30%，对来源造成60的伤害",
17 |         "主动技能": r"范围伤害：对敌方所有目标发动攻击，每个造成35%攻击力的伤害"
18 |     },
19 |     "喷火鱼": {
20 |         "被动技能": r"反弹：队友被直接攻击后，若其生命值少于30%，对来源造成60的伤害",
21 |         "主动技能": r"内斗：对一名队友发动攻击造成75伤害，并使自身攻击力永久提升140"
22 |     },
23 |     "电鳗": {
24 |         "被动技能": r"伤害转移：自身受到敌方直接攻击而受到伤害时，如果还有队友存活，会让队友帮忙承受伤害（自身承受70%，其余队友平摊30%）；每累计受到的实际伤害值达到200，自身攻击值永久提升40",
25 |         "主动技能": r"范围伤害：对敌方所有目标发动攻击，每个造成35%攻击力的伤害"
26 |     },
27 |     "翻车鱼": {
28 |         "被动技能": r"伤害转移：自身受到敌方直接攻击而受到伤害时，如果还有队友存活，会让队友帮忙承受伤害（自身承受70%，其余队友平摊30%）；每累计受到的实际伤害值达到200，自身攻击值永久提升40",
29 |         "主动技能": r"内斗：对一名队友发动攻击造成75伤害，并使自身攻击力永久提升140"
30 |     }
31 | }
32 | 
33 | ACTION_PROMPT = """这是一个双人对战游戏，每队拥有四条宠物鱼。
34 | 每条鱼都有400初始生命值、200攻击力，以及自己的主动技能和被动技能。
35 | 每个回合，你可以选择一条活着的鱼，并且使用主动技能，或用普通攻击敌方鱼。
36 | 鱼的被动技能都会自动触发，只要被动技能条件满足。
37 | %s
38 | 获胜的条件是在比赛结束时有更多的鱼活着。
39 | 
40 | 以下是四种宠物鱼:
41 | %s
42 | 
43 | 与我一起进行对战。在每一轮中，你都应该输出你的思考过程，并用以下JSON格式返回你的动作:
44 | %s
45 | 
46 | 注意！你必须在每一轮中返回你的动作。否则，你将被直接判负。"""
47 | 
48 | GUESS_RULE = """你拥有的鱼的初始信息是隐藏的，敌方可以在每回合内猜测你的一条鱼的种类，如果敌方猜对了，你的那条鱼的种类将被揭示，并且你的所有鱼会受到50的伤害"""
49 | 
50 | GUESS_PROMPT = """这是一个双人对战游戏，每队拥有四条宠物鱼。
51 | 每条鱼都有初始生命值、攻击力，以及自己的主动技能和被动技能。
52 | 所有的鱼的初始信息是隐藏的，你可以在每回合内猜测敌方的一条鱼的种类，如果你猜对了，敌方那条鱼的种类将被揭示，并且敌方的所有鱼会受到50的伤害。
53 | 获胜的条件是在比赛结束时有更多的鱼活着。
54 | 
55 | 以下是四种宠物鱼:
56 | %s
57 | 
58 | 与我一起进行对战。在每一轮中，你都应该输出你的思考过程，并用以下JSON格式返回你的动作:
59 | %s
60 | 
61 | 注意！你必须在每一轮中返回你的动作。否则，你将被直接判负。"""
62 | 
63 | ACTION_DESCRIPTION = {
64 |     1: ACTION_PROMPT % ('', fish_description, action_format),
65 |     2: ACTION_PROMPT % (GUESS_RULE, fish_description, action_format)
66 | }
67 | 
68 | GUESS_DESCRIPTION = {
69 |     2: GUESS_PROMPT % (fish_description, guess_format),
70 | }


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/prompt/cn_en.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | action_format = {
 4 |     "pick_fish": "pick an alive fish",
 5 |     "action": "choose from [normal, active]",
 6 |     "target_position": "target's position, you must choose from [0,3]"
 7 | }
 8 | 
 9 | guess_format = {
10 |     "guess_type": "the enemy's fish type you may guess",
11 |     "target_position": "guess target's position, you must choose from [0,3]"
12 | }
13 | 
14 | fish_description = {
15 |     "射水鱼": {
16 |         "被动技能": r"反弹：队友被直接攻击后，若其生命值少于30%，对来源造成60的伤害",
17 |         "主动技能": r"范围伤害：对敌方所有目标发动攻击，每个造成35%攻击力的伤害"
18 |     },
19 |     "喷火鱼": {
20 |         "被动技能": r"反弹：队友被直接攻击后，若其生命值少于30%，对来源造成60的伤害",
21 |         "主动技能": r"内斗：对一名队友发动攻击造成100伤害，并使自身攻击力永久提升140"
22 |     },
23 |     "电鳗": {
24 |         "被动技能": r"伤害转移：自身受到敌方直接攻击而受到伤害时，如果还有队友存活，会让队友帮忙承受伤害（自身承受70%，其余队友平摊30%）；每累计受到的实际伤害值达到200，自身攻击值永久提升40",
25 |         "主动技能": r"范围伤害：对敌方所有目标发动攻击，每个造成35%攻击力的伤害"
26 |     },
27 |     "翻车鱼": {
28 |         "被动技能": r"伤害转移：自身受到敌方直接攻击而受到伤害时，如果还有队友存活，会让队友帮忙承受伤害（自身承受70%，其余队友平摊30%）；每累计受到的实际伤害值达到200，自身攻击值永久提升40",
29 |         "主动技能": r"内斗：对一名队友发动攻击造成100伤害，并使自身攻击力永久提升140"
30 |     }
31 | }
32 | 
33 | ACTION_PROMPT = """这是一个双人对战游戏，每队拥有四条宠物鱼。
34 | 每条鱼都有400初始生命值、200攻击力，以及自己的主动技能和被动技能。
35 | 每个回合，你可以选择一条活着的鱼，并且使用主动技能，或用普通攻击敌方鱼。
36 | 鱼的被动技能都会自动触发，只要被动技能条件满足。
37 | %s
38 | 获胜的条件是在比赛结束时有更多的鱼活着。
39 | 
40 | 以下是四种宠物鱼:
41 | %s
42 | 
43 | 与我一起进行对战。在每一轮中，你都应该输出你的思考过程，并用以下JSON格式返回你的动作:
44 | %s
45 | 
46 | 注意！你必须在每一轮中返回你的动作。否则，你将被直接判负。"""
47 | 
48 | GUESS_RULE = """你拥有的鱼的初始信息是隐藏的，敌方可以在每回合内猜测你的一条鱼的种类，如果敌方猜对了，你的那条鱼的种类将被揭示，并且你的所有鱼会受到50的伤害"""
49 | 
50 | GUESS_PROMPT = """这是一个双人对战游戏，每队拥有四条宠物鱼。
51 | 每条鱼都有初始生命值、攻击力，以及自己的主动技能和被动技能。
52 | 所有的鱼的初始信息是隐藏的，你可以在每回合内猜测敌方的一条鱼的种类，如果你猜对了，敌方那条鱼的种类将被揭示，并且敌方的所有鱼会受到50的伤害。
53 | 获胜的条件是在比赛结束时有更多的鱼活着。
54 | 
55 | 以下是四种宠物鱼:
56 | %s
57 | 
58 | 与我一起进行对战。在每一轮中，你都应该输出你的思考过程，并用以下JSON格式返回你的动作:
59 | %s
60 | 
61 | 注意！你必须在每一轮中返回你的动作。否则，你将被直接判负。"""
62 | 
63 | ACTION_DESCRIPTION = {
64 |     1: ACTION_PROMPT % ('', fish_description, action_format),
65 |     2: ACTION_PROMPT % (GUESS_RULE, fish_description, action_format)
66 | }
67 | 
68 | GUESS_DESCRIPTION = {
69 |     2: GUESS_PROMPT % (fish_description, guess_format),
70 | }


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.15)
 2 | 
 3 | project(py_ai_sdk CXX)
 4 | 
 5 | set(CMAKE_BUILD_TYPE Release)
 6 | if(MSVC)
 7 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8 /std:c++17")
 8 | else(GCC or Clang)
 9 |     set(CMAKE_CXX_STANDARD 17)
10 | endif(MSVC)
11 | set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
12 | 
13 | # set(CMAKE_PREFIX_PATH "<Python-path>")
14 | # find_package(Python <Python-version> EXACT COMPONENTS Interpreter Development REQUIRED)
15 | set(pybind11_DIR
16 |     "/home/piano/mambaforge/lib/python3.10/site-packages/pybind11/share/cmake/pybind11")
17 | find_package(pybind11 REQUIRED)
18 | 
19 | file(GLOB_RECURSE jsoncpp "jsoncpp/*.cpp")
20 | pybind11_add_module(ai_client py_ai_sdk.cpp ${jsoncpp})
21 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/ai_client.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/ai_client.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/ai_client.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/ai_client.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CMakeDetermineCompilerABI_CXX.bin


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CMakeSystem.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_HOST_SYSTEM "Linux-5.10.16.3-microsoft-standard-WSL2")
 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.10.16.3-microsoft-standard-WSL2")
 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
 5 | 
 6 | 
 7 | 
 8 | set(CMAKE_SYSTEM "Linux-5.10.16.3-microsoft-standard-WSL2")
 9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "5.10.16.3-microsoft-standard-WSL2")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 | 
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 | 
15 | set(CMAKE_SYSTEM_LOADED 1)
16 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/3.25.0/CompilerIdCXX/a.out


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.25
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/ai_client.dir
2 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/edit_cache.dir
3 | /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/CMakeFiles/rebuild_cache.dir
4 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/jsoncpp/jsoncpp.cpp" "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o" "gcc" "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o.d"
12 |   "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/py_ai_sdk.cpp" "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o" "gcc" "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o.d"
13 |   )
14 | 
15 | # Targets to which this target links.
16 | set(CMAKE_TARGET_LINKED_INFO_FILES
17 |   )
18 | 
19 | # Fortran module output directory.
20 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
21 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o"
 3 |   "CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o.d"
 4 |   "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o"
 5 |   "CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o.d"
 6 |   "bin/ai_client.cpython-310-x86_64-linux-gnu.so"
 7 |   "bin/ai_client.pdb"
 8 | )
 9 | 
10 | # Per-language clean rules from dependency scanning.
11 | foreach(lang CXX)
12 |   include(CMakeFiles/ai_client.dir/cmake_clean_${lang}.cmake OPTIONAL)
13 | endforeach()
14 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for ai_client.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ai_client.
3 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ai_client.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.25
 3 | 
 4 | # compile CXX with /usr/bin/c++
 5 | CXX_DEFINES = -Dai_client_EXPORTS
 6 | 
 7 | CXX_INCLUDES = -isystem /home/piano/mambaforge/lib/python3.10/site-packages/pybind11/include -isystem /mnt/e/conda/envs/ml/include/python3.10
 8 | 
 9 | CXX_FLAGS = -O3 -DNDEBUG -fPIC -fvisibility=hidden -flto -fno-fat-lto-objects -std=gnu++17
10 | 
11 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -O3 -DNDEBUG -flto -shared  -o bin/ai_client.cpython-310-x86_64-linux-gnu.so CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o CMakeFiles/ai_client.dir/jsoncpp/jsoncpp.cpp.o 
2 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 | CMAKE_PROGRESS_3 = 3
4 | 
5 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/ai_client.dir/py_ai_sdk.cpp.o


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 3
2 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/bin/ai_client.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/build/bin/ai_client.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/build/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "Release")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/usr/bin/objdump")
43 | endif()
44 | 
45 | if(CMAKE_INSTALL_COMPONENT)
46 |   set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
47 | else()
48 |   set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
49 | endif()
50 | 
51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
52 |        "${CMAKE_INSTALL_MANIFEST_FILES}")
53 | file(WRITE "/mnt/c/Users/piano/Downloads/AquaWarAI/AI_SDK/Python/sdk/build/${CMAKE_INSTALL_MANIFEST}"
54 |      "${CMAKE_INSTALL_MANIFEST_CONTENT}")
55 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/py_json_cast.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef PY_JSON_CAST_HPP_INCLUDED
 4 | #define PY_JSON_CAST_HPP_INCLUDED
 5 | 
 6 | #include <pybind11/pybind11.h>
 7 | 
 8 | #include "jsoncpp/json/json.h"
 9 | 
10 | namespace pybind11::detail {
11 | template <>
12 | class type_caster<Json::Value> {
13 |    public:
14 |     PYBIND11_TYPE_CASTER(Json::Value, _("Json"));
15 | 
16 |     bool load(handle src, bool) try {
17 |         value = handle_to_json(src);
18 |         return true;
19 |     } catch (type_error) {
20 |         return false;
21 |     }
22 | 
23 |     static handle cast(Json::Value src, return_value_policy, handle) {
24 |         return json_to_handle(src).release();
25 |     }
26 | 
27 |    private:
28 |     static Json::Value handle_to_json(const handle &hdl) {
29 |         if (hdl.ptr() == nullptr || hdl.is_none()) return Json::nullValue;
30 |         if (isinstance<bool_>(hdl)) return hdl.cast<bool>();
31 |         if (isinstance<int_>(hdl)) return hdl.cast<Json::LargestInt>();
32 |         if (isinstance<float_>(hdl)) return hdl.cast<double>();
33 |         if (isinstance<str>(hdl)) return hdl.cast<std::string>();
34 |         if (isinstance<tuple>(hdl) || isinstance<list>(hdl) ||
35 |             isinstance<set>(hdl)) {
36 |             Json::Value ret = Json::arrayValue;
37 |             for (const handle &h : hdl) ret.append(handle_to_json(h));
38 |             return ret;
39 |         }
40 |         if (isinstance<dict>(hdl)) {
41 |             Json::Value ret = Json::objectValue;
42 |             for (const handle &key : hdl)
43 |                 ret[str(key)] = handle_to_json(hdl[key]);
44 |             return ret;
45 |         }
46 |         throw type_error("Bad cast from Python to C++: " +
47 |                          repr(hdl).cast<std::string>());
48 |     }
49 |     static object json_to_handle(const Json::Value &json) {
50 |         if (json.isNull()) return none();
51 |         if (json.isBool()) return bool_(json.asBool());
52 |         if (json.isIntegral()) return int_(json.asLargestInt());
53 |         if (json.isNumeric()) return float_(json.asDouble());
54 |         if (json.isString()) return str(json.asCString());
55 |         if (json.isArray()) {
56 |             list ret;
57 |             for (const Json::Value &j : json) ret.append(json_to_handle(j));
58 |             return ret;
59 |         }
60 |         if (json.isObject()) {
61 |             dict ret;
62 |             for (Json::ValueConstIterator iter = json.begin();
63 |                  iter != json.end(); ++iter)
64 |                 ret[str(iter.key().asCString())] = json_to_handle(*iter);
65 |             return ret;
66 |         }
67 |         throw type_error("Bad cast from C++ to Python: " + json.asString());
68 |     }
69 | };
70 | }  // namespace pybind11::detail
71 | 
72 | #endif  // PY_JSON_CAST_HPP_INCLUDED


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cp38-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cp38-win_amd64.pyd


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-darwin.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/AI/sdk/pyd/ai_client.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/server/tasks/card_game/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import CardGame
2 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/judger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/judger/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | cmake-build-debug


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ src/aqua_sdk.cpp src/fish.cpp src/fishset.cpp src/game.cpp src/main.cpp src/player.cpp src/jsoncpp/jsoncpp.cpp -o bin/main -std=c++17


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/bin/main:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/logic/bin/main


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/fishset.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "fish.h"
 3 | #include <vector>
 4 | #include <assert.h>
 5 | #include <iostream>
 6 | #include <fstream>
 7 | #include <algorithm>
 8 | 
 9 | extern std::ofstream debuggerfs;
10 | void debugmsg(std::string str);
11 | 
12 | class FishSet
13 | {
14 | private:
15 |   std::vector<Fish *> fishs;
16 | 
17 |   public:
18 |     int player_id;
19 |     int timestamp;
20 |     FishSet* another = nullptr;
21 |     FishSet(int flag = 0);
22 |     FishSet(const FishSet& base);  //  根据一个已知鱼集合创建，得到一个鱼编号集合和给定集合一致的鱼集合
23 |     Json::Value to_json() const;
24 |     std::vector<Fish*> get_fishs() const;
25 |     int get_size();
26 |     bool empty();
27 |     void clear();
28 |     void add(Fish* fish);
29 |     void add(int id);  //  添加对应id的鱼
30 |     void remove(int id);  //  移除对应id的鱼
31 |     void to_fight();  //  所有鱼变为战斗状态
32 |     void to_dead();  //  所有鱼变为阵亡状态
33 |     void update_state();  //  将所有鱼从战斗状态更新到最新状态（可能是阵亡状态）
34 |     // void hp_debuff(double rate);
35 |     void hp_debuff(int dec);
36 |     int count_live_fish();
37 |     int living_fish_count() const; // 存活的鱼的个数
38 |     int hp_sum() const; // 鱼的 hp 之和
39 |     int hp_max() const; // 鱼的 hp 最大值
40 |     bool is_all_dead();
41 |     bool count(Fish* tar);
42 |     int update_timestamp();
43 | 
44 |   std::optional<Json::Value> on_damaged(Fish *src, Fish *target, int dmg = -1);
45 |   void set_fishset(FishSet *_ally, FishSet *_hostile);
46 |   void start_turn();
47 |   // void debug_msg(std::string str);
48 | };


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/game.cpp:
--------------------------------------------------------------------------------
 1 | #include "game.h"
 2 | 
 3 | //  将 Game::State 转为对应阶段名称
 4 | std::string Game::state_info(Game::State state){
 5 |     switch(state){
 6 |         case READY: return "准备阶段";
 7 |         case PICK: return "选择阶段";
 8 |         case ASSERT: return "断言阶段";
 9 |         case ACTION: return "行动阶段";
10 |         case END: return "游戏结束";
11 |     }
12 |     return "";
13 | }
14 | 
15 | Game::Game(){
16 |     players.clear();
17 |     players.push_back(Player(0));
18 |     players.push_back(Player(1));
19 | }
20 | 
21 | /*
22 |     按照成员变量定义顺序转成 JSON
23 |     样例：
24 |         {
25 |             "winner": winner,
26 |             "players": [
27 |                 player1,
28 |                 player2,
29 |                 ...
30 |             ],
31 |             "state": state,
32 |             "gamestate": gamestate,
33 |             "cur_turn": cur_turn,
34 |             "over": over,
35 |             "cnt": cnt,
36 |             "score": score,
37 |             "rounds": rounds
38 |         }
39 | */
40 | Json::Value Game::to_json() const{
41 |     Json::Value json;
42 |     json["winner"] = winner;
43 |     json["players"].resize(0);
44 |     for(const auto& player : players)
45 |         json["players"].append(player.to_json());
46 |     json["state"] = state;
47 |     json["gamestate"] = gamestate;
48 |     json["cur_turn"] = cur_turn;
49 |     json["over"] = over;
50 |     json["cnt"] = cnt;
51 |     json["score"] = score;
52 |     json["rounds"] = rounds;
53 |     return json;
54 | }


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/game.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <limits>
 3 | #include "player.h"
 4 | 
 5 | class Game {
 6 |   public:
 7 |     enum State {
 8 |         READY = 0,
 9 |         PICK = 2,
10 |         ASSERT = 3,
11 |         ACTION = 4,
12 |         END = 5
13 |     };
14 |     static constexpr int STATE_LIMIT = 128;
15 |     static std::string state_info(State state);
16 |     int winner = -1;  //  获胜玩家id
17 |     int last_winner = -1;  //  上一轮获胜玩家id
18 |     std::vector<Player> players;
19 |     int state = 1;  //  当前回合(传递给Judger的)
20 |     int last_round_state = 0;
21 |     int gamestate = READY;  //  当前阶段
22 |     int cur_turn = 0;  //  当前操作者
23 |     int first_mover; // 当前轮的先手
24 |     bool over = false;  //  游戏是否结束
25 |     int cnt = 0;
26 |     int imiid[2] = {-1, -1};
27 |     int errorai = 0;
28 |     int score = 0;  //  记录比分
29 |     int rounds = 0;  //  记录战斗轮数
30 |     Game();
31 |     Json::Value to_json() const;
32 | };


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/main:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/card_game/logic/src/main


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include "aqua_sdk.cpp"
2 | 
3 | int main(int argc, char* argv[]){
4 |     AquaWarSDK aw;
5 |     aw.start();
6 |     aw.run();
7 | 
8 |     return 0;
9 | }


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/player.cpp:
--------------------------------------------------------------------------------
 1 | #include "player.h"
 2 | 
 3 | Player::Player(int _id):id(_id){
 4 |     my_fish = FishSet(1);
 5 |     my_fish.player_id = _id;
 6 |     fight_fish = FishSet();
 7 |     fight_fish.player_id = _id;
 8 | }
 9 | 
10 | Player::Player(int _id, FishSet _my_fish):id(_id),my_fish(_my_fish){
11 |     my_fish = FishSet(1);
12 |     fight_fish = FishSet();
13 | }
14 | 
15 | Player::Player(const Player &p){
16 |     id = p.id;
17 |     type = p.type;
18 |     my_fish = p.my_fish;
19 |     fight_fish = p.fight_fish;
20 | }
21 | 
22 | /*
23 |     按照成员变量定义顺序转成 JSON
24 |     样例：
25 |         {
26 |             "id": id,
27 |             "type": type,
28 |             "my_fish": my_fish,
29 |             "fight_fish": fight_fish
30 |         }
31 | */
32 | Json::Value Player::to_json() const{
33 |     Json::Value json;
34 |     json["id"] = id;
35 |     json["type"] = type;
36 |     json["my_fish"] = my_fish.to_json();
37 |     json["fight_fish"] = fight_fish.to_json();
38 |     return json;
39 | }
40 | 
41 | int Player::get_id(){
42 |     return id;
43 | }
44 | 
45 | std::vector<Fish*> Player::get_fishs() const{
46 |     return my_fish.get_fishs();
47 | }
48 | 
49 | std::vector<Fish*> Player::get_fight_fishs() const{
50 |     return fight_fish.get_fishs();
51 | }
52 | 
53 | int Player::get_size(){
54 |     return my_fish.get_size();
55 | }
56 | 
57 | bool Player::empty(){
58 |     return my_fish.empty();
59 | }
60 | 
61 | void Player::clear(){
62 |     my_fish.clear();
63 | }
64 | 
65 | void Player::add(Fish* fish){
66 |     my_fish.add(fish);
67 | }
68 | 
69 | void Player::add(int id){
70 |     my_fish.add(id);
71 | }
72 | 
73 | void Player::remove(int id){
74 |     my_fish.remove(id);
75 | }
76 | 
77 | void Player::to_war(int id){
78 |     my_fish.remove(id);
79 |     fight_fish.add(id);
80 | }


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/player.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "fishset.h"
 3 | #include <vector>
 4 | 
 5 | class Player {
 6 |   private:
 7 |     int id;  //  玩家id
 8 | 
 9 |   public:
10 |     int type = 1;  //  1 =>  AI, 2 => 播放器
11 |     FishSet my_fish;  //  队伍里的所有剩余鱼
12 |     FishSet fight_fish;  //  当前上场战斗的鱼
13 |     Player(int _id);
14 |     Player(int _id, FishSet _my_fish);
15 |     Player(const Player &p);
16 |     Json::Value to_json() const;
17 |     int get_id();
18 |     std::vector<Fish*> get_fishs() const;
19 |     std::vector<Fish*> get_fight_fishs() const;
20 |     int get_size();
21 |     bool empty();
22 |     void clear();
23 |     void add(Fish* fish);
24 |     void add(int id);  //  添加对应id的鱼
25 |     void remove(int id);  //  移除对应id的鱼
26 |     void to_war(int id);  //  编号为id的一只鱼上场战斗
27 | };


--------------------------------------------------------------------------------
/src/server/tasks/card_game/logic/src/timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <chrono>
 4 | 
 5 | class Timer {
 6 |    public:
 7 |     using BaseClock = std::chrono::steady_clock;
 8 |     using TimePoint = BaseClock::time_point;
 9 |     using Duration = BaseClock::duration;
10 | 
11 |     Timer() : m_start(Timer::now()) {}
12 |     int runtime() const {
13 |         Duration time = Timer::now() - m_start;
14 |         return std::chrono::duration_cast<std::chrono::seconds>(time).count();
15 |     }
16 |     static TimePoint now() { return BaseClock::now(); }
17 | 
18 |    private:
19 |     TimePoint m_start;
20 | };


--------------------------------------------------------------------------------
/src/server/tasks/card_game/server.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import socket
 3 | 
 4 | from src.typings.general import ChatHistoryItem
 5 | 
 6 | 
 7 | class Server:
 8 |     def __init__(self, port, workers, host="localhost"):
 9 |         self.host = host
10 |         self.port = port
11 |         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
12 | 
13 |         while True:
14 |             try:
15 |                 self.socket.bind((self.host, self.port))
16 |                 break
17 |             except OSError as e:
18 |                 self.port += 1
19 |         self.socket.listen(workers + 2)
20 |         self.log = {}
21 |         self.status = {}
22 | 
23 |     async def start(self, folder, session):
24 |         log_file = []
25 |         print(111)
26 |         client_socket, client_address = self.socket.accept()
27 |         print(222)
28 |         while True:
29 |             data = client_socket.recv(1000000).decode()
30 |             if data == "":
31 |                 self.stop(client_socket)
32 |                 break
33 |             elif data.startswith("#[ERROR]"):
34 |                 status = int(data[-1])
35 |                 self.status[folder] = status
36 |             else:
37 |                 try:
38 |                     session.history = json.loads(data)
39 |                     session.history = [ChatHistoryItem(**item) for item in session.history]
40 |                     log_file.append({"role": "user", "content": data})
41 |                     ret = await session.action()
42 |                     if ret.content is None:
43 |                         self.status[folder] = 3
44 |                         self.send_message(client_socket, "### LLM ERROR EXIT ###")
45 |                         break
46 |                     else:
47 |                         ret = ret.content
48 |                     print("\n######\n")
49 |                     print(ret)
50 |                     log_file.append({"role": "agent", "content": ret})
51 |                     print("sending message")
52 |                     self.send_message(client_socket, ret)
53 |                     print("message sent")
54 |                 except json.decoder.JSONDecodeError:
55 |                     log_file.append({"role": "agent", "content": ""})
56 |                     print("except sending")
57 |                     self.send_message(client_socket, "")
58 |                     print("except message sent")
59 |         self.log[folder] = log_file
60 | 
61 |     def send_message(self, client_socket, message):
62 |         client_socket.sendall(message.encode())
63 | 
64 |     def stop(self, client_socket):
65 |         client_socket.close()
66 | 


--------------------------------------------------------------------------------
/src/server/tasks/card_game/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import platform
 3 | import signal
 4 | import subprocess
 5 | 
 6 | 
 7 | def run_cmd(cmd_string, timeout=600):
 8 |     print("命令为：" + cmd_string)
 9 |     p = subprocess.Popen(
10 |         cmd_string,
11 |         stderr=subprocess.PIPE,
12 |         stdout=subprocess.PIPE,
13 |         shell=True,
14 |         close_fds=True,
15 |         start_new_session=True,
16 |     )
17 |     print("created")
18 |     encoding = "utf-8"
19 |     if platform.system() == "Windows":
20 |         encoding = "gbk"
21 | 
22 |     try:
23 |         print("trying")
24 |         (msg, errs) = p.communicate(timeout=timeout)
25 |         print("comed")
26 |         ret_code = p.poll()
27 |         print("polled")
28 |         if ret_code:
29 |             code = 1
30 |             msg = "[Error]Called Error ： " + str(msg.decode(encoding))
31 |         else:
32 |             code = 0
33 |             msg = str(msg.decode(encoding))
34 |         print(ret_code)
35 |     except subprocess.TimeoutExpired:
36 |         p.kill()
37 |         p.terminate()
38 |         os.killpg(p.pid, signal.SIGTERM)
39 | 
40 |         code = 1
41 |         msg = (
42 |             "[ERROR]Timeout Error : Command '"
43 |             + cmd_string
44 |             + "' timed out after "
45 |             + str(timeout)
46 |             + " seconds"
47 |         )
48 |     except Exception as e:
49 |         code = 1
50 |         msg = "[ERROR]Unknown Error : " + str(e)
51 | 
52 |     print("returning")
53 | 
54 |     return code, msg
55 | 


--------------------------------------------------------------------------------
/src/server/tasks/dbbench/requirements.txt:
--------------------------------------------------------------------------------
1 | mysql-connector-python==8.0.33
2 | docker==6.1.2


--------------------------------------------------------------------------------
/src/server/tasks/knowledgegraph/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import KnowledgeGraph


--------------------------------------------------------------------------------
/src/server/tasks/knowledgegraph/requirements.txt:
--------------------------------------------------------------------------------
1 | SPARQLWrapper
2 | networkx


--------------------------------------------------------------------------------
/src/server/tasks/knowledgegraph/utils/semparse_util.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | 
 4 | def lisp_to_nested_expression(lisp_string: str) -> List:
 5 |     """
 6 |     Takes a logical form as a lisp string and returns a nested list representation of the lisp.
 7 |     For example, "(count (division first))" would get mapped to ['count', ['division', 'first']].
 8 |     """
 9 |     stack: List = []
10 |     current_expression: List = []
11 |     tokens = lisp_string.split()
12 |     for token in tokens:
13 |         while token[0] == '(':
14 |             nested_expression: List = []
15 |             current_expression.append(nested_expression)
16 |             stack.append(current_expression)
17 |             current_expression = nested_expression
18 |             token = token[1:]
19 |         current_expression.append(token.replace(')', ''))
20 |         while token[-1] == ')':
21 |             current_expression = stack.pop()
22 |             token = token[:-1]
23 |     return current_expression[0]
24 | 
25 | def expression_to_lisp(expression) -> str:
26 |     rtn = '('
27 |     for i, e in enumerate(expression):
28 |         if isinstance(e, list):
29 |             rtn += expression_to_lisp(e)
30 |         else:
31 |             rtn += e
32 |         if i != len(expression) - 1:
33 |             rtn += ' '
34 | 
35 |     rtn += ')'
36 |     return rtn
37 | 
38 | 
39 | def get_nesting_level(expression) -> int:
40 |     max_sub = 0
41 |     for item in expression:
42 |         if isinstance(item, list):
43 |             level = get_nesting_level(item)
44 |             if level > max_sub:
45 |                 max_sub = level
46 | 
47 |     return 1 + max_sub
48 | 
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     lisp = '(AND common.topic (AND (JOIN common.topic.notable_types Comic Strip) (JOIN common.topic.notable_types Comic Strip)))'
53 |     print(get_nesting_level(lisp_to_nested_expression(lisp)))
54 | 
55 |     print(expression_to_lisp(lisp_to_nested_expression(lisp)))
56 | 


--------------------------------------------------------------------------------
/src/server/tasks/ltp/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import LateralThinkingPuzzle


--------------------------------------------------------------------------------
/src/server/tasks/mind2web/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import Mind2Web
2 | 


--------------------------------------------------------------------------------
/src/server/tasks/mind2web/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/mind2web/data_utils/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/os_interaction/__init__.py:
--------------------------------------------------------------------------------
1 | from .task import OSInteraction


--------------------------------------------------------------------------------
/src/server/tasks/os_interaction/requirements.txt:
--------------------------------------------------------------------------------
1 | docker
2 | PyYAML
3 | PyYAML
4 | Requests
5 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## I'm submitting a ...
 2 | 
 3 | - [ ] bug report
 4 | - [ ] feature request
 5 | 
 6 | ## What is the current behavior?
 7 | 
 8 | Please describe the current behavior of the WebShop app or agent
 9 | 
10 | ### Steps to Reproduce
11 | 
12 | If the current behavior is a bug, please provide the steps to reproduce and if possible a minimal demo of the problem
13 | 
14 | ## What is the expected behavior?
15 | 
16 | Please describe the desired behavior of the WebShop app or agent
17 | 
18 | ## Motivation for Change
19 | 
20 | What is the motivation / use case for changing the behavior?
21 | 
22 | ## Please tell us about your environment:
23 | 
24 | * Version: 2.0.0-beta.X
25 | * Browser:
26 | * Language:
27 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | Provide a general summary of your changes
 4 | 
 5 | ## Description of Changes
 6 | 
 7 | Describe your changes + testing (if appropriate) in technical detail
 8 | 
 9 | ## Screenshots
10 | 
11 | Include visuals such as screenshots or recordings if necessary to show changes in effect
12 | 
13 | ## Checklist
14 | - [ ] My code follows the style guidelines of this project
15 | - [ ] I have performed a self-review of my code
16 | - [ ] I have commented my code + updated documentation (if necessary)
17 | - [ ] I have added tests to define the behavior of the feature(s) and verify it is working
18 | - [ ] New + existing unit tests pass


--------------------------------------------------------------------------------
/src/server/tasks/webshop/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
 1 | # .github/workflows/pytest.yml
 2 | name: PyTest
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   test:
14 |     runs-on: ubuntu-latest
15 |     timeout-minutes: 10
16 | 
17 |     steps:
18 |       - name: Check out repository code
19 |         uses: actions/checkout@v3
20 | 
21 |       # Setup Python (faster than using Python container)
22 |       - name: Setup Python
23 |         uses: actions/setup-python@v3
24 |         with:
25 |           python-version: "3.8"
26 | 
27 |       # Install pip dependencies + setup for testing
28 |       - name: Install dependencies
29 |         run: |
30 |           python -m pip install --upgrade pip
31 |           pip install -r requirements.txt
32 |           python -m spacy download en_core_web_lg
33 | 
34 |       # Run testing suite
35 |       - name: Run test suite
36 |         run: |
37 |           pytest -v


--------------------------------------------------------------------------------
/src/server/tasks/webshop/.gitignore:
--------------------------------------------------------------------------------
 1 | *.ipynb*
 2 | *.pyc
 3 | *.swp
 4 | 
 5 | .DS_Store
 6 | .idea/
 7 | .pytest_cache/
 8 | .vscode/
 9 | 
10 | __pycache__/
11 | data/
12 | search_engine/indexes*
13 | search_engine/resources*
14 | transfer/flagged
15 | user_session_logs/
16 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Princeton Natural Language Processing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/assets/diagram.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/assets/diagram.gif


--------------------------------------------------------------------------------
/src/server/tasks/webshop/assets/model_ckpts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/assets/model_ckpts.png


--------------------------------------------------------------------------------
/src/server/tasks/webshop/assets/transfer-logic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/assets/transfer-logic.png


--------------------------------------------------------------------------------
/src/server/tasks/webshop/baseline_models/.gitignore:
--------------------------------------------------------------------------------
  1 | logs/
  2 | wandb/
  3 | ckpts/
  4 | __pycache__/
  5 | data/il_trajs_finalized_images.jsonl
  6 | *.ipynb
  7 | 
  8 | 
  9 | *.txt
 10 | !requirements.txt
 11 | scripts/
 12 | *.out
 13 | wandb/
 14 | *.swp
 15 | logs/
 16 | .DS_Store
 17 | .idea/
 18 | nbs/
 19 | 
 20 | crawl-*
 21 | 
 22 | # Byte-compiled / optimized / DLL files
 23 | __pycache__/
 24 | *.py[cod]
 25 | *$py.class
 26 | 
 27 | # C extensions
 28 | *.so
 29 | 
 30 | # Distribution / packaging
 31 | .Python
 32 | build/
 33 | develop-eggs/
 34 | dist/
 35 | downloads/
 36 | eggs/
 37 | .eggs/
 38 | lib/
 39 | lib64/
 40 | parts/
 41 | sdist/
 42 | var/
 43 | wheels/
 44 | *.egg-info/
 45 | .installed.cfg
 46 | *.egg
 47 | MANIFEST
 48 | 
 49 | # PyInstaller
 50 | #  Usually these files are written by a python script from a template
 51 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 52 | *.manifest
 53 | *.spec
 54 | 
 55 | # Installer logs
 56 | pip-log.txt
 57 | pip-delete-this-directory.txt
 58 | 
 59 | # Unit test / coverage reports
 60 | htmlcov/
 61 | .tox/
 62 | .coverage
 63 | .coverage.*
 64 | .cache
 65 | nosetests.xml
 66 | coverage.xml
 67 | *.cover
 68 | .hypothesis/
 69 | .pytest_cache/
 70 | 
 71 | # Translations
 72 | *.mo
 73 | *.pot
 74 | 
 75 | # Django stuff:
 76 | *.log
 77 | local_settings.py
 78 | db.sqlite3
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | target/
 92 | 
 93 | # Jupyter Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # pyenv
 97 | .python-version
 98 | 
 99 | # celery beat schedule file
100 | celerybeat-schedule
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/baseline_models/generate_search.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | 
 4 | import torch
 5 | from tqdm import tqdm
 6 | from transformers import BartForConditionalGeneration
 7 | 
 8 | from train_search import get_data, get_dataset, tokenizer
 9 | 
10 | if __name__ == "__main__":
11 |     model = BartForConditionalGeneration.from_pretrained(
12 |         './ckpts/web_search/checkpoint-800')
13 |     model.eval()
14 |     model = model.to('cuda')
15 |     dataset = get_dataset("web_search")
16 |     dataloader = torch.utils.data.DataLoader(dataset["all"], batch_size=32)
17 |     _, all_goals = get_data("all")
18 |     all_dec = []
19 |     for batch in tqdm(dataloader):
20 |         output = model.generate(
21 |             input_ids=batch["input_ids"].to('cuda'),
22 |             attention_mask=batch["attention_mask"].to('cuda'),
23 |             num_beams=10, num_return_sequences=10,
24 |             max_length=512, early_stopping=True
25 |         )
26 |         dec = tokenizer.batch_decode(
27 |             output, skip_special_tokens=True, clean_up_tokenization_spaces=False)
28 |         assert len(dec) % 10 == 0
29 |         for i in range(len(dec) // 10):
30 |             all_dec.append(dec[i*10:(i+1)*10])
31 |     assert len(all_goals) == len(all_dec)
32 |     d = {goal: dec for goal, dec in zip(all_goals, all_dec)}
33 |     with open('./data/goal_query_predict.json', 'w') as f:
34 |         json.dump(d, f)
35 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/baseline_models/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | datasets
3 | faiss-gpu
4 | transformers
5 | wandb


--------------------------------------------------------------------------------
/src/server/tasks/webshop/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/conftest.py


--------------------------------------------------------------------------------
/src/server/tasks/webshop/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.11.1
 2 | cleantext==1.1.4
 3 | env==0.1.0
 4 | Flask==2.1.2
 5 | gdown
 6 | gradio
 7 | gym==0.24.0
 8 | numpy==1.22.4
 9 | pandas==1.4.2
10 | pyserini==0.17.0
11 | pytest
12 | PyYAML==6.0
13 | rank_bm25==0.2.2
14 | requests==2.27.1
15 | requests_mock
16 | rich==12.4.4
17 | scikit_learn==1.1.1
18 | selenium==4.2.0
19 | spacy
20 | thefuzz==0.19.0
21 | torch==1.11.0
22 | tqdm==4.64.0
23 | train==0.0.5
24 | transformers==4.19.2
25 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/run_dev.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export FLASK_ENV=development
3 | python -m web_agent_site.app --log --attrs
4 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/run_envs/run_web_agent_site_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the site gym environment.
 3 | 
 4 | TODO: move to testing dir for more rigorous tests
 5 | """
 6 | import gym
 7 | from rich import print
 8 | from rich.markup import escape
 9 | 
10 | from web_agent_site.envs import WebAgentSiteEnv
11 | from web_agent_site.models import *
12 | from web_agent_site.utils import DEBUG_PROD_SIZE
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     #env = gym.make('WebAgentSite-v0')
17 |     #env = WebAgentSiteEnv(render=True, pause=2.0)
18 |     #env = WebAgentSiteEnv(observation_mode='html', render=False)
19 |     env = WebAgentSiteEnv(observation_mode='text', render=False, num_products=DEBUG_PROD_SIZE)
20 |     global_step = 0
21 |     
22 |     try:
23 |         #policy = HumanPolicy()
24 |         policy = GPTPolicy()
25 |     
26 |         observation = env.observation
27 |         while True:
28 |             print(observation)
29 |             available_actions = env.get_available_actions()
30 |             print('Available actions:', available_actions)
31 |             action = policy.forward(observation, available_actions)
32 |             observation, reward, done, info = env.step(action)
33 |             print(f'Taking action "{escape(action)}" -> Reward = {reward}')
34 |             if done:
35 |                 break
36 |             global_step += 1
37 |     finally:
38 |         env.close()


--------------------------------------------------------------------------------
/src/server/tasks/webshop/run_envs/run_web_agent_text_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the text gym environment.
 3 | 
 4 | TODO: move to testing dir for more rigorous tests
 5 | """
 6 | import datetime
 7 | import os
 8 | import sys
 9 | 
10 | from rich import print
11 | from rich.markup import escape
12 | 
13 | from web_agent_site.envs import WebAgentTextEnv
14 | from web_agent_site.models import *
15 | 
16 | model_exec = ""
17 | 
18 | 
19 | class InteractionLog:
20 |     def __init__(self, file, name) -> None:
21 |         self.file = file
22 |         self.name = name
23 |         self.suffix_index = 0
24 |         while os.path.exists(self.file_name):
25 |             self.suffix_index += 1
26 |         self.stdout = None
27 |         self.logfile = None
28 | 
29 |     @property
30 |     def file_name(self):
31 |         return self.file + "-" + str(self.suffix_index) + ".log"
32 | 
33 |     def __enter__(self):
34 |         self.logfile = open(self.file_name, 'w', encoding="utf-8")
35 |         self.stdout = sys.stdout
36 |         sys.stdout = self.logfile
37 |         return self
38 | 
39 |     def __exit__(self, exc_type, exc_value, traceback):
40 |         sys.stdout = self.stdout
41 |         self.logfile.close()
42 |         self.logfile = None
43 |         self.stdout = None
44 | 
45 | 
46 | def worker(log_file, idx, rnge):
47 |     with InteractionLog(log_file, idx):
48 |         env = WebAgentTextEnv(observation_mode="text", human_goals=True)
49 |         print("total goals:", len(env.server.goals))
50 |         print("ranging:", rnge)
51 |         scores = []
52 |         for i in range(*rnge):
53 |             env.reset(i)
54 |             print(f"=== Episode #{i} ===")
55 | 
56 |             policy = eval(model_exec)
57 | 
58 |             observation = env.observation
59 |             for j in range(100):
60 |                 print(observation)
61 |                 available_actions = env.get_available_actions()
62 |                 print('Available actions:', available_actions)
63 |                 action = policy.forward(observation, available_actions)
64 |                 if not action:
65 |                     reward = 0
66 |                     break
67 |                 observation, reward, done, info = env.step(action)
68 |                 print(f'Taking action "{escape(action)}" -> Reward = {reward}')
69 |                 if done:
70 |                     break
71 |             else:
72 |                 reward = 0
73 |             print(f"#{i} {reward}")
74 |             scores.append(reward)
75 | 
76 |         print(f"#Average: {sum(scores) / len(scores)}")
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     # env = gym.make('WebAgentTextEnv-v0', observation_mode='text', num_products=DEBUG_PROD_SIZE)
81 |     arg_length = len(sys.argv)
82 |     if arg_length == 1:
83 |         ranging = (0, 12087)
84 |     elif arg_length == 2:
85 |         ranging = (int(sys.argv[1]), int(sys.argv[1]) + 1)
86 |     elif arg_length == 3:
87 |         ranging = (int(sys.argv[1]), int(sys.argv[2]))
88 |     else:
89 |         ranging = (0, 12087)
90 |     model_exec = input(">>> ")
91 |     print("got EXEC", model_exec)
92 |     log_file = "logs/%s" % (datetime.datetime.now().strftime("%Y-%m-%d=%H-%M-%S"))
93 |     worker(log_file, 0, ranging)
94 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/run_prod.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m web_agent_site.app --log
3 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/run_web_agent_site_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m run_envs.run_web_agent_site_env
3 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/run_web_agent_text_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m run_envs.run_web_agent_text_env
3 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/search_engine/convert_product_file_format.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | from tqdm import tqdm
 4 | sys.path.insert(0, '../')
 5 | 
 6 | from web_agent_site.utils import DEFAULT_FILE_PATH
 7 | from web_agent_site.engine.engine import load_products
 8 | 
 9 | all_products, *_ = load_products(filepath=DEFAULT_FILE_PATH)
10 | 
11 | 
12 | docs = []
13 | for p in tqdm(all_products, total=len(all_products)):
14 |     option_texts = []
15 |     options = p.get('options', {})
16 |     for option_name, option_contents in options.items():
17 |         option_contents_text = ', '.join(option_contents)
18 |         option_texts.append(f'{option_name}: {option_contents_text}')
19 |     option_text = ', and '.join(option_texts)
20 | 
21 |     doc = dict()
22 |     doc['id'] = p['asin']
23 |     doc['contents'] = ' '.join([
24 |         p['Title'],
25 |         p['Description'],
26 |         p['BulletPoints'][0],
27 |         option_text,
28 |     ]).lower()
29 |     doc['product'] = p
30 |     docs.append(doc)
31 | 
32 | 
33 | with open('./resources_100/documents.jsonl', 'w+') as f:
34 |     for doc in docs[:100]:
35 |         f.write(json.dumps(doc) + '\n')
36 | 
37 | with open('./resources/documents.jsonl', 'w+') as f:
38 |     for doc in docs:
39 |         f.write(json.dumps(doc) + '\n')
40 | 
41 | with open('./resources_1k/documents.jsonl', 'w+') as f:
42 |     for doc in docs[:1000]:
43 |         f.write(json.dumps(doc) + '\n')
44 | 
45 | with open('./resources_100k/documents.jsonl', 'w+') as f:
46 |     for doc in docs[:100000]:
47 |         f.write(json.dumps(doc) + '\n')
48 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/search_engine/lucene_searcher.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pyserini.search.lucene import LuceneSearcher
 3 | from rich import print
 4 | 
 5 | 
 6 | searcher = LuceneSearcher('indexes')
 7 | hits = searcher.search('rubber sole shoes', k=20)
 8 | 
 9 | for hit in hits:
10 |     doc = searcher.doc(hit.docid)
11 |     print(doc)
12 |     obj = json.loads(doc.raw())['product']['Title']
13 |     print(obj)
14 | 
15 | print(len(hits))
16 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/search_engine/run_indexing.sh:
--------------------------------------------------------------------------------
 1 | python -m pyserini.index.lucene \
 2 |   --collection JsonCollection \
 3 |   --input resources_100 \
 4 |   --index indexes_100 \
 5 |   --generator DefaultLuceneDocumentGenerator \
 6 |   --threads 1 \
 7 |   --storePositions --storeDocvectors --storeRaw
 8 | 
 9 | python -m pyserini.index.lucene \
10 |   --collection JsonCollection \
11 |   --input resources \
12 |   --index indexes \
13 |   --generator DefaultLuceneDocumentGenerator \
14 |   --threads 1 \
15 |   --storePositions --storeDocvectors --storeRaw
16 | 
17 | python -m pyserini.index.lucene \
18 |   --collection JsonCollection \
19 |   --input resources_1k \
20 |   --index indexes_1k \
21 |   --generator DefaultLuceneDocumentGenerator \
22 |   --threads 1 \
23 |   --storePositions --storeDocvectors --storeRaw
24 | 
25 | python -m pyserini.index.lucene \
26 |   --collection JsonCollection \
27 |   --input resources_100k \
28 |   --index indexes_100k \
29 |   --generator DefaultLuceneDocumentGenerator \
30 |   --threads 1 \
31 |   --storePositions --storeDocvectors --storeRaw
32 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Displays information on how to use script
 4 | helpFunction()
 5 | {
 6 |   echo "Usage: $0 [-d small|all]"
 7 |   echo -e "\t-d small|all - Specify whether to download entire dataset (all) or just 1000 (small)"
 8 |   exit 1 # Exit script after printing help
 9 | }
10 | 
11 | # Get values of command line flags
12 | while getopts d: flag
13 | do
14 |   case "${flag}" in
15 |     d) data=${OPTARG};;
16 |   esac
17 | done
18 | 
19 | if [ -z "$data" ]; then
20 |   echo "[ERROR]: Missing -d flag"
21 |   helpFunction
22 | fi
23 | 
24 | # Install Python Dependencies
25 | pip install -r requirements.txt;
26 | 
27 | # Install Environment Dependencies via `conda`
28 | conda install -c pytorch faiss-cpu==1.7.2;
29 | conda install -c conda-forge openjdk=11;
30 | 
31 | # Download dataset into `data` folder via `gdown` command
32 | mkdir -p data;
33 | cd data;
34 | if [ "$data" == "small" ]; then
35 |   gdown https://drive.google.com/uc?id=1EgHdxQ_YxqIQlvvq5iKlCrkEKR6-j0Ib; # items_shuffle_1000 - product scraped info
36 |   gdown https://drive.google.com/uc?id=1IduG0xl544V_A_jv3tHXC0kyFi7PnyBu; # items_ins_v2_1000 - product attributes
37 | elif [ "$data" == "all" ]; then
38 |   gdown https://drive.google.com/uc?id=1A2whVgOO0euk5O13n2iYDM0bQRkkRduB; # items_shuffle
39 |   gdown https://drive.google.com/uc?id=1s2j6NgHljiZzQNL3veZaAiyW_qDEgBNi; # items_ins_v2
40 | else
41 |   echo "[ERROR]: argument for `-d` flag not recognized"
42 |   helpFunction
43 | fi
44 | gdown https://drive.google.com/uc?id=14Kb5SPBk_jfdLZ_CDBNitW98QLDlKR5O # items_human_ins
45 | cd ..
46 | 
47 | # Download spaCy large NLP model
48 | python -m spacy download en_core_web_lg
49 | 
50 | # Build search engine index
51 | cd search_engine
52 | mkdir -p resources resources_100 resources_1k resources_100k
53 | python convert_product_file_format.py # convert items.json => required doc format
54 | mkdir -p indexes
55 | ./run_indexing.sh
56 | cd ..
57 | 
58 | # Create logging folder + samples of log data
59 | get_human_trajs () {
60 |   PYCMD=$(cat <<EOF
61 | import gdown
62 | url="https://drive.google.com/drive/u/1/folders/16H7LZe2otq4qGnKw_Ic1dkt-o3U9Zsto"
63 | gdown.download_folder(url, quiet=True, remaining_ok=True)
64 | EOF
65 |   )
66 |   python -c "$PYCMD"
67 | }
68 | mkdir -p user_session_logs/
69 | cd user_session_logs/
70 | echo "Downloading 50 example human trajectories..."
71 | get_human_trajs
72 | echo "Downloading example trajectories complete"
73 | cd ..


--------------------------------------------------------------------------------
/src/server/tasks/webshop/tests/transfer/mocks/mock_parse_item_page_ws_desc:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html>
 3 | <html>
 4 |   <head>
 5 |     <link rel="stylesheet" href="/static/style.css">
 6 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 7 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.0.3/css/font-awesome.css	">
 8 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 9 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.bundle.min.js"></script>
10 |     <link rel="icon" href="data:,">
11 |   </head>
12 |   <body>
13 |     <div class="container py-5">
14 |       <div class="row top-buffer">
15 |         <div class="col-sm-6">
16 |           <div id="instruction-text" class="text-center">
17 |             <h4>Instruction:<br>i want an xx-small sized slim fit button down shirt with long sleeves. pick something in white, and price lower than 50.00 dollars</h4>
18 |           </div>
19 |         </div>
20 |       </div>
21 |       <div class="row top-buffer">
22 |         <form method="post" action="/abc">
23 |           <button type="submit" class="btn btn-success">Back to Search</button>
24 |         </form>
25 |       </div>
26 |       <div class="row top-buffer">
27 |         <form method="post" action="/item_page/abc/B09P87V3LZ/%5B%27red%27%2C%20%27basketball%27%2C%20%27shoes%27%5D/1/%7B%7D">
28 |           <button type="submit" class="btn btn-primary">&lt; Prev</button>
29 |         </form>
30 |       </div>
31 |       <div class="row top-buffer">
32 |         <div class="col-md-12">
33 |           <div class="row top-buffer">
34 |             <div class="col-sm-6" name="description">
35 |               <div class="card card-body">
36 |                 <p class="product-info">Here Are The Things You Want To Knowa─=≡Σ(((つ̀ώ)つSTORE INTRODUCTION:&gt;&gt;&gt;&gt;Our store helps our customers improve their quality of life~As a distributor, we value quality and service.Focus on the high quality and durability of the product.Committed to creating a store that satisfies and reassures our customers.TIPS:&gt;&gt;&gt;&gt;1. Please allow minor errors in the data due to manual measurements.2. Due to the color settings of the display, the actual color may be slightly different from the online image.QUALITY PROMISE:&gt;&gt;&gt;&gt;Our goal is to continuously provide a range of quality products.We place a huge emphasis on the values of quality and reliability.We have always insisted on fulfilling this commitment.In short, we want our customers to have the same great product experience every time and be trusted to deliver on this commitment.Please give us a chance to serve you.OTHER:&gt;&gt;&gt;&gt;athletic sneaker laces athletic sneakers white athletic sneakers for women clearance leather Sneaker leather sneakers women leather sneakers for menleather sneaker laces leather sneaker platform basketball shoes basketball shoes for men basketball shoe laces basketball shoe grip basketball shoes for women fitness shoes for men fitness shoes women workout fitness shoes women fitness shoes women size 5 fitness shoes men workout fitness shoes for men high top sneakers for women walking shoes sneakers with arch support for women</p>
37 |               </div>
38 |             </div>
39 |           </div>
40 |         </div>
41 |       </div>
42 |     </div>
43 |   </body>
44 | </html>


--------------------------------------------------------------------------------
/src/server/tasks/webshop/tests/web-agent-site/engine/test_normalize.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from web_agent_site.engine.normalize import *
 3 | 
 4 | def test_normalize_color():
 5 |     suite = [
 6 |         ("", ""),
 7 |         ("black forest", "black"),
 8 |         ("violet lavender", "lavender"),
 9 |         ("steelivy fuchsia", "fuchsia"),
10 |         ("123alabaster", "alabaster"),
11 |         ("webshop", "webshop")
12 |     ]
13 |     for color_string, expected in suite:
14 |         output = normalize_color(color_string)
15 |         assert type(output) is str
16 |         assert output == expected
17 | 
18 | def test_normalize_color_size():
19 |     product_prices = {
20 |         (1, "black forest", "3 meter"): 10.29,
21 |         (2, "violet lavender", "xx-large"): 23.42,
22 |         (3, "steelivy fuchsia", "random value"): 193.87,
23 |         (4, "123alabaster", "40cm plus"): 67.23,
24 |         (5, "webshop", "142"): 1.02,
25 |         (6, "webshopsteel", "2 petite"): 57.99,
26 |         (7, "leather black", "91ft walnut feet"): 6.20,
27 |     }
28 |     color_mapping_expected = {
29 |         'N.A.': 'not_matched',
30 |         "black forest": "black",
31 |         "violet lavender": "lavender",
32 |         "steelivy fuchsia": "fuchsia",
33 |         "123alabaster": "alabaster",
34 |         "webshop": "not_matched",
35 |         "webshopsteel": "steel",
36 |         "leather black": "black"
37 |     }
38 |     size_mapping_expected = {
39 |         'N.A.': 'not_matched',
40 |         "3 meter": '(.*)meter',
41 |         "xx-large": 'xx-large',
42 |         "random value": "not_matched",
43 |         "40cm plus": '(.*)plus',
44 |         "142": "numeric_size",
45 |         "2 petite": "(.*)petite",
46 |         "91ft walnut feet": '(.*)ft',
47 |     }
48 | 
49 |     color_mapping, size_mapping = normalize_color_size(product_prices)
50 |     assert type(color_mapping) == dict
51 |     assert type(size_mapping)  == dict
52 |     assert color_mapping == color_mapping_expected
53 |     assert size_mapping  == size_mapping_expected
54 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/tests/web-agent-site/test_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import pytest
 3 | import random
 4 | import shutil
 5 | from pathlib import Path
 6 | from web_agent_site.utils import *
 7 | 
 8 | def test_random_idx():
 9 |     random.seed(24)
10 |     weights = [random.randint(0, 10) for _ in range(0, 50)]
11 |     cml_weights = [0]
12 |     for w in weights:
13 |         cml_weights.append(cml_weights[-1] + w)
14 |     idx_1, expected_1 = random_idx(cml_weights), 44
15 |     idx_2, expected_2 = random_idx(cml_weights), 15
16 |     idx_3, expected_3 = random_idx(cml_weights), 36
17 |     assert idx_1 == expected_1
18 |     assert idx_2 == expected_2
19 |     assert idx_3 == expected_3
20 | 
21 | def test_setup_logger():
22 |     LOG_DIR = 'user_session_logs_test/'
23 |     user_log_dir = Path(LOG_DIR)
24 |     user_log_dir.mkdir(parents=True, exist_ok=True)
25 |     session_id = "ABC"
26 | 
27 |     logger = setup_logger(session_id, user_log_dir)
28 |     log_file = Path(LOG_DIR + "/" + session_id + ".jsonl")
29 |     assert Path(log_file).is_file()
30 |     assert logger.level == logging.INFO
31 | 
32 |     content = "Hello there"
33 |     logger.info(content)
34 |     assert log_file.read_text().strip("\n") == content
35 | 
36 |     shutil.rmtree(LOG_DIR)
37 | 
38 | def test_generate_mturk_code():
39 |     suite = [
40 |         ('', 'DA39A3EE5E'),
41 |         ('ABC', '3C01BDBB26'),
42 |         ('123', '40BD001563'),
43 |         ('1A1', '10E7DB0A44'),
44 |         ('$%^ABC', '5D5607D24E')
45 |     ]
46 |     for session_id, expected in suite:
47 |         output = generate_mturk_code(session_id)
48 |         assert type(expected) is str
49 |         assert output == expected


--------------------------------------------------------------------------------
/src/server/tasks/webshop/transfer/README.md:
--------------------------------------------------------------------------------
 1 | # Sim-to-real Transfer
 2 | This folder contains code for transferring agents trained on WebShop to perform on third party websites, specifically [Amazon](http://amazon.com) and [eBay](http://ebay.com). The imitation learning and reinforcement learning agents exercised by the transfer code can be found on WebShop's Hugging Face [page](https://huggingface.co/webshop).
 3 | 
 4 | Interact with a demo of the transfer code, deployed as a 🤗 Hugging Face space [here](https://huggingface.co/spaces/webshop/amazon_shop)!
 5 | 
 6 | ## 🛠️ Usage
 7 | The Gradio app deployed as the aforementioned Hugging Face space can be started locally by running `python app.py` in this folder. The initial `setup.sh` script should have installed all the required dependencies.
 8 | 
 9 | ## ➡️ Transfer Logic
10 | The Sim-to-real transfer code follows this general logical flow:
11 | 
12 | <img src="../assets/transfer-logic.png" width="100%">
13 | 
14 | The contents of this directory each serve the following purposes:
15 | * `app.py`: Run to launch interactive [Gradio](https://gradio.app/) demo of app
16 | * `predict_help.py`: Amazon, eBay web scraping code
17 | * `webshop_lite.py`: A condensed version of WebShop's templating engine
18 | 
19 | If you are interested in *transferring an agent's functionality to an new website or platform*, you will need to...
20 | 1. implement two new functions:  `parse_results_<platform>.py` and `parse_item_page_<platform>.py`. The corresponding interfaces and working examples for Amazon can be found [here](https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L262) and [here](https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L296).
21 | 2. Invoke these functions in the [`run_episode`](https://github.com/princeton-nlp/webshop/tree/master/transfer/app.py#L105) function in the `app.py` file. Specifically, you should add a single call to...
22 |      * `parse_results...` in the [conditional]((https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L220)) handling `Page.RESULTS` page types
23 |      * `parse_item_page...` in the [conditional]((https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L240)) handling `Page.ITEMS` page types


--------------------------------------------------------------------------------
/src/server/tasks/webshop/transfer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/transfer/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/web_agent_site/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/attributes/annotate.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from pathlib import Path
 3 | from rich import print
 4 | 
 5 | ATTR_DIR = './data/attributes'
 6 | 
 7 | ATTR_PATHS = [
 8 |     'narrow_2-gram.yaml',
 9 |     'narrow_1-gram.yaml',
10 |     'broad_2-gram.yaml',
11 |     'broad_1-gram.yaml',
12 | ]
13 | ATTR_PATHS = [Path(ATTR_DIR) / af for af in ATTR_PATHS]
14 | 
15 | 
16 | def annotate(attr_path):
17 |     with open(attr_path) as f:
18 |         attrs_by_cat = yaml.safe_load(f)
19 | 
20 |     unique_attrs = set()
21 |     all_attrs = []
22 |     for _, attrs in attrs_by_cat.items():
23 |         attrs = [a.split('|')[0].strip() for a in attrs]
24 |         unique_attrs.update(attrs)
25 |         all_attrs += attrs
26 |     print(f'Total unique attributes: {len(unique_attrs)}')
27 |     total = len(all_attrs)
28 |     num_left = len(all_attrs)
29 | 
30 |     annotated_attrs_by_cat = dict()
31 |     for category, attrs in attrs_by_cat.items():
32 |         print(
33 |             f'Category: [ {category} ] | '
34 |             f'Number of attributes: {len(attrs)}\n'
35 |         )
36 |         annotated_attrs = []
37 |         for i, attr in enumerate(attrs):
38 |             attr, score = attr.split(' | ')
39 |             print(
40 |                 f'{"[" + str(i) + "]":<5} '
41 |                 f'[bold green]{attr:<30}[/bold green] | '
42 |                 f'[red]{category}[/red] | '
43 |                 f'{score}'
44 |             )
45 |             tags = input(
46 |                 'Annotate [1: ITEM, 2: PROP, 3: USE, '
47 |                 '⎵: next example, q: next category] > '
48 |             )
49 |             print('\n')
50 |             tags = tags.strip()
51 |             annotated_attrs.append(f'{attr} | {score} | {tags}')
52 |             if 'q' in tags:
53 |                 break
54 |         
55 |         num_left -= len(attrs)
56 |         print(f'{num_left} / {total} total attributes left.')
57 | 
58 |         ans = input('Starting the next category... [y/n] > ')
59 |         if ans == 'n':
60 |             break
61 | 
62 | def main():
63 |     for attr_path in ATTR_PATHS:
64 |         annotate(attr_path)
65 | 
66 | if __name__ == '__main__':
67 |     """
68 |     python -m web_agent_site.attributes.annotate
69 |     """
70 |     main()
71 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/web_agent_site/engine/__init__.py


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | from web_agent_site.envs.web_agent_site_env import WebAgentSiteEnv
 4 | from web_agent_site.envs.web_agent_text_env import WebAgentTextEnv
 5 | 
 6 | register(
 7 |   id='WebAgentSiteEnv-v0',
 8 |   entry_point='web_agent_site.envs:WebAgentSiteEnv',
 9 | )
10 | 
11 | register(
12 |   id='WebAgentTextEnv-v0',
13 |   entry_point='web_agent_site.envs:WebAgentTextEnv',
14 | )


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/models/__init__.py:
--------------------------------------------------------------------------------
1 | from web_agent_site.models.models import *
2 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/models/models.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Model implementations. The model interface should be suitable for both
 3 | the ``site env'' and the ``text env''.
 4 | """
 5 | import json
 6 | import random
 7 | import re
 8 | 
 9 | import requests
10 | 
11 | random.seed(4)
12 | 
13 | 
14 | class BasePolicy:
15 |     def __init__(self):
16 |         pass
17 | 
18 |     def forward(self, observation, available_actions):
19 |         """
20 |         Args:
21 |             observation (`str`):
22 |                 HTML string
23 | 
24 |             available_actions ():
25 |                 ...
26 |         Returns:
27 |             action (`str`): 
28 |                 Return string of the format ``action_name[action_arg]''.
29 |                 Examples:
30 |                     - search[white shoes]
31 |                     - click[button=Reviews]
32 |                     - click[button=Buy Now]
33 |         """
34 |         raise NotImplementedError
35 | 
36 | 
37 | class HumanPolicy(BasePolicy):
38 |     def __init__(self):
39 |         super().__init__()
40 | 
41 |     def forward(self, observation, available_actions):
42 |         action = input('> ')
43 |         return action
44 | 
45 | 
46 | class RandomPolicy(BasePolicy):
47 |     def __init__(self):
48 |         super().__init__()
49 | 
50 |     def forward(self, observation, available_actions):
51 |         if available_actions['has_search_bar']:
52 |             action = 'search[shoes]'
53 |         else:
54 |             action_arg = random.choice(available_actions['clickables'])
55 |             action = f'click[{action_arg}]'
56 |         return action
57 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/static/images/no-image-available.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/AgentBench/41e6807349f741e06428fae3e9e4d334e7002362/src/server/tasks/webshop/web_agent_site/static/images/no-image-available.png


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/templates/attributes_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
 5 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 6 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.0.3/css/font-awesome.css	">
 7 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 8 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.bundle.min.js"></script>
 9 |     <link rel="icon" href="data:,">
10 |   </head>
11 |   <body>
12 |     <div class="container py-5">
13 |       <div class="row top-buffer">
14 |         <div class="col-sm-6">
15 |           <div id="instruction-text" class="text-center">
16 |             <h4>Instruction:<br>{{ instruction_text }}</h4>
17 |           </div>
18 |         </div>
19 |       </div>
20 |       <div class="row top-buffer">
21 |         <form method="post" action="{{url_for('index', session_id=session_id)}}">
22 |           <button type="submit" class="btn btn-success">Back to Search</button>
23 |         </form>
24 |       </div>
25 |       <div class="row top-buffer">
26 |         <form method="post" action="{{url_for('item_page', session_id=session_id, asin=asin, keywords=keywords, page=page, options=options)}}">
27 |           <button type="submit" class="btn btn-primary">&lt; Prev</button>
28 |         </form>
29 |       </div>
30 |       <div class="row top-buffer">
31 |         <div class="col-md-12">
32 |           <div class="row top-buffer">
33 |             <div class="col-sm-6" name="description">
34 |               <div class="card card-body">
35 |                 <ul>
36 |                   {% for attribute in product_info.Attributes %}
37 |                   <li><p class="attribute"> {{attribute}}</p></li>
38 |                   {% endfor %}
39 |                 </ul>
40 |               </div>
41 |             </div>
42 |             <div class="col-sm-6" name="description">
43 |               <div class="d-flex align-items-center justify-content-between mt-1">
44 |                 <h5 class="font-weight-bold my-2 product-category">{{product_info.category}}</h5>
45 |               </div>
46 |               <div class="d-flex align-items-center justify-content-between mt-1">
47 |                 <h5 class="font-weight-bold my-2 product-query">{{product_info.query}}</h5>
48 |               </div>
49 |               <div class="d-flex align-items-center justify-content-between mt-1">
50 |                 <h5 class="font-weight-bold my-2 product-product_category">{{product_info.product_category}}</h5>
51 |               </div>
52 |             </div>
53 |           </div>
54 |         </div>
55 |       </div>
56 |     </div>
57 |   </body>
58 | </html>


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/templates/description_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
 5 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 6 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.0.3/css/font-awesome.css	">
 7 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 8 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.bundle.min.js"></script>
 9 |     <link rel="icon" href="data:,">
10 |   </head>
11 |   <body>
12 |     <div class="container py-5">
13 |       <div class="row top-buffer">
14 |         <div class="col-sm-6">
15 |           <div id="instruction-text" class="text-center">
16 |             <h4>Instruction:<br>{{ instruction_text }}</h4>
17 |           </div>
18 |         </div>
19 |       </div>
20 |       <div class="row top-buffer">
21 |         <form method="post" action="{{url_for('index', session_id=session_id)}}">
22 |           <button type="submit" class="btn btn-success">Back to Search</button>
23 |         </form>
24 |       </div>
25 |       <div class="row top-buffer">
26 |         <form method="post" action="{{url_for('item_page', session_id=session_id, asin=asin, keywords=keywords, page=page, options=options)}}">
27 |           <button type="submit" class="btn btn-primary">&lt; Prev</button>
28 |         </form>
29 |       </div>
30 |       <div class="row top-buffer">
31 |         <div class="col-md-12">
32 |           <div class="row top-buffer">
33 |             <div class="col-sm-6" name="description">
34 |               <div class="card card-body">
35 |                 <p class="product-info">{{product_info.Description}}</p>
36 |               </div>
37 |             </div>
38 |           </div>
39 |         </div>
40 |       </div>
41 |     </div>
42 |   </body>
43 | </html>


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/templates/done_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
 5 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
 6 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 7 |     <link rel="icon" href="data:,">
 8 |   </head>
 9 |   <body>
10 |     <!-- Code reference: https://bootsnipp.com/snippets/m13mN -->
11 |     <div class="container" style="margin-top: 8%;">
12 |       <div class="col-md-6 col-md-offset-3">     
13 |         <div class="row">
14 |           <div id="thankyou" class="text-center">
15 |             <h1>Thank you for shopping with us!</h1>
16 |           </div>
17 |           <div id="stats" class="text-center">
18 |             <h3 align="mturk_code">Your code: </h3>
19 |             <p><pre>{{ mturk_code }}</pre> (Paste it in your MTurk interface.)</p>
20 |             <div style="display:none">
21 |               <h2 align="left">Purchased</h2>
22 |               <hr class="solid">
23 |               <h4 id="asin">asin<pre>{{ asin }}</pre></p>
24 |               <h4 id="options">options<pre>{{ options | tojson }}</pre></h4>
25 |               <h4 id="purchased_attrs">attrs<pre>{{ purchased_attrs }}</pre></h4>
26 |               <h4 id="purchased-category">category<pre>{{ category }}</pre></h4>
27 |               <h4 id="purchased-query">query<pre>{{ query }}</pre></h4>
28 |               <h4 id="purchased-pc">product category<pre>{{ product_category }}</pre></h4>
29 |               <h2 align="left">Target</h2>
30 |               <hr class="solid">
31 |               <h4 id="goal-asin">asin<pre>{{ goal.asin }}</pre></p>
32 |               <h4 id="goal-options">options<pre>{{ goal.goal_options }}</pre></h4>
33 |               <h4 id="goal-attrs">attrs<pre>{{ goal.attributes }}</pre></h4>
34 |               <h4 id="goal-price">price upper<pre>{{ goal.price_upper }}</pre></h4>
35 |               <h4 id="goal-instruction-text">instuction text<pre>{{ goal.instruction_text }}</pre></h4>
36 |               <h4 id="goal-category">category<pre>{{ goal.category }}</pre></h4>
37 |               <h4 id="goal-pc">product category<pre>{{ goal.product_category }}</pre></h4>
38 |               <h4 id="goal-query">query<pre>{{ goal.query }}</pre></h4>
39 |               <h4>Goal <div id="goal"><pre>{{ goal | pprint }}</pre></div></h4>
40 |               <h2 align="left">Reward</h2>
41 |             </div>
42 |             <hr class="solid">
43 |       			<h3   id="reward">Your score (min 0.0, max 1.0)<pre>{{ reward }}</pre></h3>
44 |             <h4 hidden>Reward Details <div id="reward_info"><pre>{{ reward_info | pprint }}</pre></div></h4>
45 |           </div>
46 |         </div>            
47 |       </div>
48 |     </div>
49 |   </body>
50 | </html>
51 | 


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/templates/features_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
 5 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 6 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.0.3/css/font-awesome.css	">
 7 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 8 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.bundle.min.js"></script>
 9 |     <link rel="icon" href="data:,">
10 |   </head>
11 |   <body>
12 |     <div class="container py-5">
13 |       <div class="row top-buffer">
14 |         <div class="col-sm-6">
15 |           <div id="instruction-text" class="text-center">
16 |             <h4>Instruction:<br>{{ instruction_text }}</h4>
17 |           </div>
18 |         </div>
19 |       </div>
20 |       <div class="row top-buffer">
21 |         <form method="post" action="{{url_for('index', session_id=session_id)}}">
22 |           <button type="submit" class="btn btn-success">Back to Search</button>
23 |         </form>
24 |       </div>
25 |       <div class="row top-buffer">
26 |         <form method="post" action="{{url_for('item_page', session_id=session_id, asin=asin, keywords=keywords, page=page, options=options)}}">
27 |           <button type="submit" class="btn btn-primary">&lt; Prev</button>
28 |         </form>
29 |       </div>
30 |       <div class="row top-buffer">
31 |         <div class="col-md-12">
32 |           <div class="row top-buffer">
33 |             <div class="col-sm-6" name="bulletpoints">
34 |               <div class="card card-body">
35 |                 <ul>
36 |                   {% for bulletpoint in product_info.BulletPoints %}
37 |                   <li><p class="product-info"> {{bulletpoint}}</p></li>
38 |                   {% endfor %}
39 |                 </ul>
40 |               </div>
41 |             </div>
42 |           </div>
43 |         </div>
44 |       </div>
45 |     </div>
46 |   </body>
47 | </html>


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/templates/review_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
 5 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 6 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.0.3/css/font-awesome.css	">
 7 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 8 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.bundle.min.js"></script>
 9 |     <link rel="icon" href="data:,">
10 |   </head>
11 |   <body>
12 |     <div class="container py-5">
13 |       <div class="row top-buffer">
14 |         <div class="col-sm-6">
15 |           <div id="instruction-text" class="text-center">
16 |             <h4>Instruction:<br>{{ instruction_text }}</h4>
17 |           </div>
18 |         </div>
19 |       </div>
20 |       <div class="row top-buffer">
21 |         <form method="post" action="{{url_for('index', session_id=session_id)}}">
22 |           <button type="submit" class="btn btn-success">Back to Search</button>
23 |         </form>
24 |       </div>
25 |       <div class="row top-buffer">
26 |         <form method="post" action="{{url_for('item_page', session_id=session_id, asin=asin, keywords=keywords, page=page, options=options)}}">
27 |           <button type="submit" class="btn btn-primary">&lt; Prev</button>
28 |         </form>
29 |       </div>
30 |       <div class="row top-buffer">
31 |         <div class="col-md-12">
32 |           <div class="row top-buffer">
33 |             <div class="col-sm-6" name="reviews">
34 |               <div class="card card-body">
35 |                 {% for review in product_info.Reviews %}
36 |                   <div class="card">
37 |                     <div class="row text-left">
38 |                       <h4 class="blue-text mt-3">"{{review.title}}"</h4>
39 |                       <p class="text-left">
40 |                         <span>{{review.score}}</span>
41 |                         {% for i in range(review.score | int) %}
42 |                             <span class="fa fa-star star-active"></span>
43 |                         {% endfor %}
44 |                         {% for i in range(5 - review.score | int) %}
45 |                             <span class="fa fa-star star-inactive"></span>
46 |                         {% endfor %}
47 |                       </p>
48 |                       <p class="content">{{review.body}}</p>
49 |                     </div>
50 |                   </div>
51 |                 {% endfor %}
52 |               </div>
53 |             </div>
54 |           </div>
55 |         </div>
56 |       </div>
57 |     </div>
58 |   </body>
59 | </html>


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/templates/search_page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
 5 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
 6 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
 7 |     <link rel="icon" href="data:,">
 8 |   </head>
 9 |   <body>
10 |     <!-- Code reference: https://bootsnipp.com/snippets/m13mN -->
11 |     <div class="container" style="margin-top: 8%;">
12 |       <div class="col-md-6 col-md-offset-3">     
13 |         <div class="row">
14 |           <div id="logo" class="text-center">
15 |             <h2>WebShop</h2>
16 |           </div>
17 |           <div id="instruction-text" class="text-center">
18 |             <h4>Instruction: <br>{{ instruction_text }}</h4>
19 |           </div>
20 |           <form role="form" id="form-buscar" method="post" action="{{url_for('index', session_id=session_id)}}">
21 |             <div class="form-group">
22 |               <div class="input-group">
23 |                 <input id="search_input" class="form-control" type="text" name="search_query" placeholder="Search..." required/>
24 |                 <span class="input-group-btn">
25 |                   <button class="btn btn-success" type="submit"><i class="glyphicon glyphicon-search" aria-hidden="true"></i>Search</button>
26 |                 </span>
27 |               </div>
28 |             </div>
29 |           </form>
30 |         </div>            
31 |       </div>
32 |     </div>
33 |   </body>
34 | </html>


--------------------------------------------------------------------------------
/src/server/tasks/webshop/web_agent_site/utils.py:
--------------------------------------------------------------------------------
 1 | import bisect
 2 | import hashlib
 3 | import logging
 4 | import random
 5 | from os.path import dirname, abspath, join
 6 | 
 7 | BASE_DIR = dirname(abspath(__file__))
 8 | DEBUG_PROD_SIZE = None  # set to `None` to disable
 9 | 
10 | DEFAULT_ATTR_PATH = join(BASE_DIR, '../data/items_ins_v2_1000.json')
11 | # DEFAULT_ATTR_PATH = join(BASE_DIR, '../data/items_ins_v2.json')
12 | DEFAULT_FILE_PATH = join(BASE_DIR, '../data/items_shuffle_1000.json')
13 | # DEFAULT_FILE_PATH = join(BASE_DIR, '../data/items_shuffle.json')
14 | DEFAULT_REVIEW_PATH = join(BASE_DIR, '../data/reviews.json')
15 | 
16 | FEAT_CONV = join(BASE_DIR, '../data/feat_conv.pt')
17 | FEAT_IDS = join(BASE_DIR, '../data/feat_ids.pt')
18 | 
19 | HUMAN_ATTR_PATH = join(BASE_DIR, '../data/items_human_ins.json')
20 | HUMAN_ATTR_PATH = join(BASE_DIR, '../data/items_human_ins.json')
21 | 
22 | def random_idx(cum_weights):
23 |     """Generate random index by sampling uniformly from sum of all weights, then
24 |     selecting the `min` between the position to keep the list sorted (via bisect)
25 |     and the value of the second to last index
26 |     """
27 |     pos = random.uniform(0, cum_weights[-1])
28 |     idx = bisect.bisect(cum_weights, pos)
29 |     idx = min(idx, len(cum_weights) - 2)
30 |     return idx
31 | 
32 | def setup_logger(session_id, user_log_dir):
33 |     """Creates a log file and logging object for the corresponding session ID"""
34 |     logger = logging.getLogger(session_id)
35 |     formatter = logging.Formatter('%(message)s')
36 |     file_handler = logging.FileHandler(
37 |         user_log_dir / f'{session_id}.jsonl',
38 |         mode='w'
39 |     )
40 |     file_handler.setFormatter(formatter)
41 |     logger.setLevel(logging.INFO)
42 |     logger.addHandler(file_handler)
43 |     return logger
44 | 
45 | def generate_mturk_code(session_id: str) -> str:
46 |     """Generates a redeem code corresponding to the session ID for an MTurk
47 |     worker once the session is completed
48 |     """
49 |     sha = hashlib.sha1(session_id.encode())
50 |     return sha.hexdigest()[:10].upper()


--------------------------------------------------------------------------------
/src/typings/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import *
2 | from .exception import *
3 | from .general import *
4 | from .output import *
5 | from .request import *
6 | from .status import *
7 | 


--------------------------------------------------------------------------------
/src/typings/exception.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | 
 4 | class AgentBenchException(Exception):
 5 |     pass
 6 | 
 7 | 
 8 | class ClientException(AgentBenchException):
 9 |     def __init__(self, reason: str, detail: Union[str, None] = None) -> None:
10 |         super().__init__()
11 |         self.reason = reason
12 |         self.detail = detail
13 | 
14 |     def __str__(self) -> str:
15 |         if not self.detail:
16 |             return "{CLASS_NAME}[{REASON}]".format(
17 |                 CLASS_NAME=self.__class__.__name__, REASON=self.reason
18 |             )
19 |         else:
20 |             return "{CLASS_NAME}[{REASON}]: {DETAIL}".format(
21 |                 CLASS_NAME=self.__class__.__name__,
22 |                 REASON=self.reason,
23 |                 DETAIL=self.detail,
24 |             )
25 | 
26 | 
27 | class ServerException(AgentBenchException):
28 |     pass
29 | 
30 | 
31 | class AgentClientException(ClientException):
32 |     pass
33 | 
34 | 
35 | class TaskClientException(ClientException):
36 |     pass
37 | 
38 | 
39 | class AgentContextLimitException(AgentClientException):
40 |     def __init__(self, detail: Union[str, None] = None) -> None:
41 |         super().__init__("agent_context_limit", detail)
42 | 
43 | 
44 | class AgentTimeoutException(AgentClientException):
45 |     def __init__(self, detail: Union[str, None] = None) -> None:
46 |         super().__init__("agent_timeout", detail)
47 | 
48 | 
49 | class AgentNetworkException(AgentClientException):
50 |     def __init__(self, detail: Union[str, None] = None) -> None:
51 |         super().__init__("agent_network", detail)
52 | 
53 | 
54 | class TaskTimeoutException(TaskClientException):
55 |     def __init__(self, detail: Union[str, None] = None) -> None:
56 |         super().__init__("task_timeout", detail)
57 | 
58 | 
59 | class TaskNetworkException(TaskClientException):
60 |     def __init__(self, detail: Union[str, None] = None) -> None:
61 |         super().__init__("task_network", detail)
62 | 


--------------------------------------------------------------------------------
/src/typings/general.py:
--------------------------------------------------------------------------------
 1 | import builtins
 2 | from typing import List, Dict, Union, Any, Literal
 3 | 
 4 | from pydantic import BaseModel, validator
 5 | 
 6 | JSONSerializable = Union[None, bool, int, float, str, List[Any], Dict[str, Any]]
 7 | SampleIndex = Union[int, str]
 8 | 
 9 | 
10 | class InstanceFactory(BaseModel):
11 |     module: str
12 |     parameters: Dict[str, Any] = {}
13 | 
14 |     @validator("parameters", pre=True)
15 |     def _ensure_dict(cls, v):
16 |         if v is None:
17 |             return {}
18 |         return v
19 | 
20 |     def create(self):
21 |         # print('>>>>>>>> ', self.module, self.parameters)
22 |         splits = self.module.split(".")
23 |         if len(splits) == 0:
24 |             raise Exception("Invalid module name: {}".format(self.module))
25 |         if len(splits) == 1:
26 |             g = globals()
27 |             if self.module in g:
28 |                 class_type = g[self.module]
29 |             else:
30 |                 class_type = getattr(builtins, self.module)
31 |             return class_type(**self.parameters)
32 |         else:
33 |             path = ".".join(self.module.split(".")[:-1])
34 |             mod = __import__(path, fromlist=[self.module.split(".")[-1]])
35 |             return getattr(mod, self.module.split(".")[-1])(**self.parameters)
36 | 
37 | 
38 | class Assignment(BaseModel):
39 |     agent: str
40 |     task: str
41 | 
42 | 
43 | class ChatHistoryItem(BaseModel):
44 |     role: Literal["user", "agent"]
45 |     content: str
46 | 


--------------------------------------------------------------------------------
/src/typings/output.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, List
 2 | 
 3 | from pydantic import BaseModel, root_validator
 4 | 
 5 | from . import ChatHistoryItem
 6 | from .general import JSONSerializable, SampleIndex
 7 | from .status import SampleStatus, AgentOutputStatus
 8 | 
 9 | 
10 | class TaskOutput(BaseModel):
11 |     index: Union[None, SampleIndex] = None
12 |     status: SampleStatus = SampleStatus.RUNNING
13 |     result: JSONSerializable = None
14 |     history: Union[None, List[ChatHistoryItem]] = None
15 | 
16 | 
17 | class TaskSampleExecutionResult(BaseModel):
18 |     status: SampleStatus = SampleStatus.COMPLETED
19 |     result: JSONSerializable = None
20 | 
21 | 
22 | class AgentOutput(BaseModel):
23 |     status: AgentOutputStatus = AgentOutputStatus.NORMAL
24 |     content: Union[str, None] = None
25 | 
26 |     # at least one of them should be not None
27 |     @root_validator(pre=False, skip_on_failure=True)
28 |     def post_validate(cls, instance: dict):
29 |         assert (
30 |             instance.get("status") is not AgentOutputStatus.NORMAL
31 |             or instance.get("content") is not None
32 |         ), "If status is NORMAL, content should not be None"
33 |         return instance
34 | 
35 | 
36 | class TaskClientOutput(BaseModel):
37 |     error: Union[str, None] = None
38 |     info: Union[str, None] = None
39 |     output: Union[TaskOutput, None] = None
40 | 


--------------------------------------------------------------------------------
/src/typings/request.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from .general import SampleIndex
 6 | from .output import AgentOutput, TaskOutput
 7 | 
 8 | 
 9 | class RegisterRequest(BaseModel):
10 |     name: str
11 |     address: str
12 |     concurrency: int
13 |     indices: list
14 | 
15 | 
16 | class StartSampleRequest(BaseModel):
17 |     name: str
18 |     index: SampleIndex
19 | 
20 | 
21 | class InteractRequest(BaseModel):
22 |     session_id: int
23 |     agent_response: AgentOutput
24 | 
25 | 
26 | class CancelRequest(BaseModel):
27 |     session_id: int
28 | 
29 | 
30 | class HeartbeatRequest(BaseModel):
31 |     name: str
32 |     address: str
33 | 
34 | 
35 | class CalculateOverallRequest(BaseModel):
36 |     name: str
37 |     results: List[TaskOutput]
38 | 
39 | 
40 | class WorkerStartSampleRequest(BaseModel):
41 |     index: SampleIndex
42 |     session_id: int
43 | 
44 | 
45 | class SampleStatusRequest(BaseModel):
46 |     session_id: int
47 | 


--------------------------------------------------------------------------------
/src/typings/status.py:
--------------------------------------------------------------------------------
 1 | from enum import IntEnum, Enum
 2 | 
 3 | 
 4 | class SampleStatus(str, Enum):
 5 |     RUNNING = "running"
 6 |     COMPLETED = "completed"
 7 |     AGENT_CONTEXT_LIMIT = "agent context limit"
 8 |     AGENT_VALIDATION_FAILED = "agent validation failed"
 9 |     AGENT_INVALID_ACTION = "agent invalid action"
10 |     TASK_LIMIT_REACHED = "task limit reached"
11 |     UNKNOWN = "unknown"
12 |     TASK_ERROR = "task error"
13 | 
14 | 
15 | class WorkerStatus(IntEnum):
16 |     ALIVE = 0
17 |     COMA = 1
18 |     DEAD = 2
19 | 
20 | 
21 | class AgentOutputStatus(str, Enum):
22 |     NORMAL = "normal"
23 |     CANCELLED = "cancelled"
24 |     AGENT_CONTEXT_LIMIT = "agent context limit"
25 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .max_flow import Graph, MaxFlow
2 | from .others import *
3 | from .rules import *
4 | 


--------------------------------------------------------------------------------
/src/utils/others.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class JsonEncoder(json.JSONEncoder):
 7 |     """Convert numpy classes to JSON serializable objects."""
 8 | 
 9 |     def default(self, obj):
10 |         if isinstance(obj, (np.integer, np.floating, np.bool_)):
11 |             return obj.item()
12 |         elif isinstance(obj, np.ndarray):
13 |             return obj.tolist()
14 |         else:
15 |             return super(JsonEncoder, self).default(obj)
16 | 
17 | 
18 | def serialize(obj, max_depth=5, compress=False):
19 |     """
20 |     dump into json, including only basic types, list types and dict types.
21 |     If other types are included, they will be converted into string.
22 |     """
23 |     if max_depth <= 0:
24 |         return "..."
25 |     if isinstance(obj, (int, float, str, bool, type(None))):
26 |         return obj
27 |     elif isinstance(obj, list) or isinstance(obj, tuple):
28 |         if not compress or len(obj) <= 5:
29 |             return [serialize(item, max_depth - 1, compress) for item in obj]
30 |         else:
31 |             return [serialize(item, max_depth - 1, True) for item in obj[:5]] + [
32 |                 "...(total: %d)" % len(obj)
33 |             ]
34 |     elif isinstance(obj, dict):
35 |         if not compress or len(obj) <= 5:
36 |             return {
37 |                 str(key): serialize(obj[key], max_depth - 1, compress) for key in obj
38 |             }
39 |         else:
40 |             ret = {
41 |                 str(key): serialize(obj[key], max_depth - 1, True)
42 |                 for key in list(obj.keys())[:5]
43 |             }
44 |             ret["...total..."] = len(obj)
45 |             return ret
46 |     elif hasattr(obj, "__dict__"):
47 |         return serialize(obj.__dict__, max_depth, True)
48 |     else:
49 |         ret = str(obj)
50 |         if len(ret) > 100:
51 |             ret = ret[:45] + "   ...   " + ret[-45:]
52 |         return ret
53 | 
54 | 
55 | class ColorMessage:
56 |     @staticmethod
57 |     def red(msg):
58 |         return "\033[91m" + msg + "\033[0m"
59 | 
60 |     @staticmethod
61 |     def green(msg):
62 |         return "\033[92m" + msg + "\033[0m"
63 | 
64 |     @staticmethod
65 |     def cyan(msg):
66 |         return "\033[96m" + msg + "\033[0m"
67 | 
68 |     @staticmethod
69 |     def yellow(msg):
70 |         return "\033[93m" + msg + "\033[0m"
71 | 


--------------------------------------------------------------------------------
/src/utils/rules.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | 
 4 | class RuleBase:
 5 |     def check(self, obj) -> bool:
 6 |         raise NotImplementedError()
 7 | 
 8 | 
 9 | class ContainRule(RuleBase):
10 |     def __init__(self, target, reverse=False) -> None:
11 |         """
12 |          Check if target is in obj.
13 |         `reverse`: if True, check if obj is in target
14 |         """
15 |         self.target = target
16 |         self.reverse = reverse
17 | 
18 |     def check(self, obj) -> bool:
19 |         if self.reverse:
20 |             return obj in self.target
21 |         else:
22 |             return self.target in obj
23 | 
24 | 
25 | class NotRule(RuleBase):
26 |     def __init__(self, rule: RuleBase) -> None:
27 |         self.rule = rule
28 | 
29 |     def check(self, obj) -> bool:
30 |         return not self.rule.check(obj)
31 | 
32 | 
33 | class AndRule(RuleBase):
34 |     def __init__(self, rules: List[RuleBase]) -> None:
35 |         self.rules = rules
36 | 
37 |     def check(self, obj) -> bool:
38 |         return all(rule.check(obj) for rule in self.rules)
39 | 
40 | 
41 | class OrRule(RuleBase):
42 |     def __init__(self, rules: List[RuleBase]) -> None:
43 |         self.rules = rules
44 | 
45 |     def check(self, obj) -> bool:
46 |         return any(rule.check(obj) for rule in self.rules)
47 | 


--------------------------------------------------------------------------------