├── .gitattributes
├── .github
    ├── .codecov.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_template.yml
    │   ├── feature_request.md
    │   └── technical_proposal.md
    ├── dependabot.yml
    └── workflows
    │   ├── deploy-docs.yml
    │   ├── dummy-agent-test.yml
    │   ├── ghcr.yml
    │   ├── lint.yml
    │   ├── review-pr.yml
    │   ├── run-integration-tests.yml
    │   ├── run-unit-tests.yml
    │   ├── solve-issue.yml
    │   └── stale.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Development.md
├── LICENSE
├── LOG_VISUALIZER.md
├── Makefile
├── README.md
├── agenthub
    ├── __init__.py
    ├── browsing_agent
    │   ├── __init__.py
    │   ├── browsing_agent.py
    │   └── response_parser.py
    ├── dummy_web_agent
    │   ├── README.md
    │   ├── __init__.py
    │   ├── commands.txt
    │   ├── dummy_web_agent.py
    │   ├── prompt.py
    │   └── utils.py
    ├── reasoner_agent_fast
    │   ├── __init__.py
    │   └── reasoner_agent_fast.py
    └── reasoner_agent_full
    │   ├── __init__.py
    │   └── reasoner_agent_full.py
├── config.toml
├── dev_config
    └── python
    │   ├── .pre-commit-config.yaml
    │   ├── mypy.ini
    │   └── ruff.toml
├── easy-web-icon.png
├── easyweb
    ├── README.md
    ├── __init__.py
    ├── controller
    │   ├── __init__.py
    │   ├── action_parser.py
    │   ├── agent.py
    │   ├── agent_controller.py
    │   └── state
    │   │   ├── state.py
    │   │   └── task.py
    ├── core
    │   ├── config.py
    │   ├── const
    │   │   └── guide_url.py
    │   ├── download.py
    │   ├── exceptions.py
    │   ├── logger.py
    │   ├── main.py
    │   ├── metrics.py
    │   ├── schema
    │   │   ├── __init__.py
    │   │   ├── action.py
    │   │   ├── agent.py
    │   │   ├── config.py
    │   │   ├── observation.py
    │   │   └── stream.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── json.py
    │   │   └── singleton.py
    ├── events
    │   ├── __init__.py
    │   ├── action
    │   │   ├── __init__.py
    │   │   ├── action.py
    │   │   ├── agent.py
    │   │   ├── browse.py
    │   │   ├── commands.py
    │   │   ├── empty.py
    │   │   ├── files.py
    │   │   ├── message.py
    │   │   ├── planning.py
    │   │   └── tasks.py
    │   ├── event.py
    │   ├── observation
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── browse.py
    │   │   ├── commands.py
    │   │   ├── delegate.py
    │   │   ├── empty.py
    │   │   ├── error.py
    │   │   ├── files.py
    │   │   ├── observation.py
    │   │   ├── recall.py
    │   │   └── success.py
    │   ├── serialization
    │   │   ├── __init__.py
    │   │   ├── action.py
    │   │   ├── event.py
    │   │   ├── observation.py
    │   │   └── utils.py
    │   └── stream.py
    ├── llm
    │   ├── bedrock.py
    │   └── llm.py
    ├── memory
    │   ├── __init__.py
    │   ├── condenser.py
    │   ├── history.py
    │   └── memory.py
    ├── runtime
    │   ├── __init__.py
    │   ├── browser
    │   │   ├── __init__.py
    │   │   └── browser_env.py
    │   ├── docker
    │   │   ├── __init__.py
    │   │   ├── exec_box.py
    │   │   ├── local_box.py
    │   │   ├── process.py
    │   │   └── ssh_box.py
    │   ├── e2b
    │   │   ├── README.md
    │   │   ├── filestore.py
    │   │   ├── process.py
    │   │   ├── runtime.py
    │   │   └── sandbox.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── agent_skills
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── agentskills.py
    │   │   │   └── setup.sh
    │   │   ├── jupyter
    │   │   │   ├── __init__.py
    │   │   │   ├── execute_cli
    │   │   │   ├── execute_cli.py
    │   │   │   ├── execute_server
    │   │   │   └── setup.sh
    │   │   ├── mixin.py
    │   │   ├── requirement.py
    │   │   └── swe_agent_commands
    │   │   │   ├── __init__.py
    │   │   │   ├── _setup_cursor_mode_env.sh
    │   │   │   ├── _setup_default_env.sh
    │   │   │   ├── _split_string
    │   │   │   ├── cursors_defaults.sh
    │   │   │   ├── cursors_edit_linting.sh
    │   │   │   ├── defaults.sh
    │   │   │   ├── edit_linting.sh
    │   │   │   ├── parse_commands.py
    │   │   │   ├── search.sh
    │   │   │   ├── setup_cursor_mode.sh
    │   │   │   └── setup_default.sh
    │   ├── process.py
    │   ├── runtime.py
    │   ├── sandbox.py
    │   ├── server
    │   │   ├── browse.py
    │   │   ├── files.py
    │   │   └── runtime.py
    │   ├── tools.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── singleton.py
    │   │   └── system.py
    ├── server
    │   ├── README.md
    │   ├── __init__.py
    │   ├── auth
    │   │   ├── __init__.py
    │   │   └── auth.py
    │   ├── data_models
    │   │   └── feedback.py
    │   ├── listen.py
    │   ├── mock
    │   │   ├── README.md
    │   │   └── listen.py
    │   └── session
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── manager.py
    │   │   └── session.py
    └── storage
    │   ├── __init__.py
    │   ├── files.py
    │   ├── local.py
    │   ├── memory.py
    │   └── s3.py
├── frontend-icon.png
├── frontend.py
├── log_visualizer
    ├── controller.py
    ├── main.py
    └── session.py
├── model_port_config.json
├── my_log_visualizer.py
├── poetry.lock
├── pyproject.toml
├── pytest.ini
└── tests
    ├── integration
        ├── README.md
        ├── conftest.py
        ├── mock
        │   ├── BrowsingAgent
        │   │   └── test_browse_internet
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   └── response_003.log
        │   ├── CodeActAgent
        │   │   ├── test_browse_internet
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── prompt_005.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   ├── response_004.log
        │   │   │   └── response_005.log
        │   │   ├── test_edits
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   └── response_003.log
        │   │   ├── test_ipython
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── response_001.log
        │   │   │   └── response_002.log
        │   │   ├── test_ipython_module
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   └── response_003.log
        │   │   └── test_write_simple_script
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   └── response_003.log
        │   ├── DelegatorAgent
        │   │   ├── test_edits
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── prompt_005.log
        │   │   │   ├── prompt_006.log
        │   │   │   ├── prompt_007.log
        │   │   │   ├── prompt_008.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   ├── response_004.log
        │   │   │   ├── response_005.log
        │   │   │   ├── response_006.log
        │   │   │   ├── response_007.log
        │   │   │   └── response_008.log
        │   │   └── test_write_simple_script
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── prompt_005.log
        │   │   │   ├── prompt_006.log
        │   │   │   ├── prompt_007.log
        │   │   │   ├── prompt_008.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   ├── response_004.log
        │   │   │   ├── response_005.log
        │   │   │   ├── response_006.log
        │   │   │   ├── response_007.log
        │   │   │   └── response_008.log
        │   ├── ManagerAgent
        │   │   ├── test_edits
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── prompt_005.log
        │   │   │   ├── prompt_006.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   ├── response_004.log
        │   │   │   ├── response_005.log
        │   │   │   └── response_006.log
        │   │   ├── test_simple_task_rejection
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── prompt_005.log
        │   │   │   ├── prompt_006.log
        │   │   │   ├── prompt_007.log
        │   │   │   ├── prompt_008.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   ├── response_004.log
        │   │   │   ├── response_005.log
        │   │   │   ├── response_006.log
        │   │   │   ├── response_007.log
        │   │   │   └── response_008.log
        │   │   └── test_write_simple_script
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── prompt_005.log
        │   │   │   ├── prompt_006.log
        │   │   │   ├── prompt_007.log
        │   │   │   ├── prompt_008.log
        │   │   │   ├── prompt_009.log
        │   │   │   ├── prompt_010.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   ├── response_004.log
        │   │   │   ├── response_005.log
        │   │   │   ├── response_006.log
        │   │   │   ├── response_007.log
        │   │   │   ├── response_008.log
        │   │   │   ├── response_009.log
        │   │   │   └── response_010.log
        │   ├── MonologueAgent
        │   │   └── test_write_simple_script
        │   │   │   ├── prompt_001.log
        │   │   │   ├── prompt_002.log
        │   │   │   ├── prompt_003.log
        │   │   │   ├── prompt_004.log
        │   │   │   ├── response_001.log
        │   │   │   ├── response_002.log
        │   │   │   ├── response_003.log
        │   │   │   └── response_004.log
        │   └── PlannerAgent
        │   │   └── test_write_simple_script
        │   │       ├── prompt_001.log
        │   │       ├── prompt_002.log
        │   │       ├── prompt_003.log
        │   │       ├── prompt_004.log
        │   │       ├── prompt_005.log
        │   │       ├── response_001.log
        │   │       ├── response_002.log
        │   │       ├── response_003.log
        │   │       ├── response_004.log
        │   │       └── response_005.log
        ├── regenerate.sh
        ├── start_http_server.py
        ├── static
        │   └── index.html
        ├── test_agent.py
        └── workspace
        │   └── test_edits
        │       └── bad.txt
    ├── test_fileops.py
    └── unit
        ├── README.md
        ├── test_action_serialization.py
        ├── test_agent_skill.py
        ├── test_arg_parser.py
        ├── test_config.py
        ├── test_event_stream.py
        ├── test_ipython.py
        ├── test_is_stuck.py
        ├── test_json.py
        ├── test_logging.py
        ├── test_micro_agents.py
        ├── test_observation_serialization.py
        ├── test_response_parsing.py
        ├── test_sandbox.py
        └── test_storage.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-vendored
2 | 


--------------------------------------------------------------------------------
/.github/.codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   notify:
 3 |     wait_for_ci: true
 4 | 
 5 | coverage:
 6 |   status:
 7 |     patch:
 8 |       default:
 9 |         threshold: 100% # allow patch coverage to be lower than project coverage by any amount
10 |     project:
11 |       default:
12 |         threshold: 5% # allow project coverage to drop at most 5%
13 | 
14 | comment: false
15 | github_checks:
16 |     annotations: false
17 | 
18 | ignore:
19 |   - "agenthub/SWE_agent/**" # SWE agent is deprecated
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_template.yml:
--------------------------------------------------------------------------------
 1 | name: Bug
 2 | description: Report a problem with OpenDevin
 3 | title: '[Bug]: '
 4 | labels: ['bug']
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: Thank you for taking the time to fill out this bug report. We greatly appreciate your effort to complete this template fully. Please provide as much information as possible to help us understand and address the issue effectively.
 9 | 
10 |   - type: checkboxes
11 |     attributes:
12 |       label: Is there an existing issue for the same bug?
13 |       description: Please check if an issue already exists for the bug you encountered.
14 |       options:
15 |       - label: I have checked the troubleshooting document at https://opendevin.github.io/OpenDevin/modules/usage/troubleshooting
16 |         required: true
17 |       - label: I have checked the existing issues.
18 |         required: true
19 | 
20 |   - type: textarea
21 |     id: bug-description
22 |     attributes:
23 |       label: Describe the bug
24 |       description: Provide a short description of the problem.
25 |     validations:
26 |       required: true
27 | 
28 |   - type: textarea
29 |     id: current-version
30 |     attributes:
31 |       label: Current OpenDevin version
32 |       description: What version of OpenDevin are you using? If you're running in docker, tell us the tag you're using (e.g. ghcr.io/opendevin/opendevin:0.3.1).
33 |       render: bash
34 |     validations:
35 |       required: true
36 | 
37 |   - type: textarea
38 |     id: config
39 |     attributes:
40 |       label: Installation and Configuration
41 |       description: Please provide any commands you ran and any configuration (redacting API keys)
42 |       render: bash
43 |     validations:
44 |       required: true
45 | 
46 |   - type: textarea
47 |     id: model-agent
48 |     attributes:
49 |       label: Model and Agent
50 |       description: What model and agent are you using? You can see these settings in the UI by clicking the settings wheel.
51 |       placeholder: |
52 |         - Model:
53 |         - Agent:
54 | 
55 |   - type: textarea
56 |     id: os-version
57 |     attributes:
58 |       label: Operating System
59 |       description: What Operating System are you using? Linux, Mac OS, WSL on Windows
60 | 
61 |   - type: textarea
62 |     id: repro-steps
63 |     attributes:
64 |       label: Reproduction Steps
65 |       description: Please list the steps to reproduce the issue.
66 |       placeholder: |
67 |         1.
68 |         2.
69 |         3.
70 | 
71 |   - type: textarea
72 |     id: additional-context
73 |     attributes:
74 |       label: Logs, Errors, Screenshots, and Additional Context
75 |       description: LLM logs will be stored in the `logs/llm/default` folder. Please add any additional context about the problem here.
76 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: Suggest an idea for OpenDevin features
 4 | title: ''
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What problem or use case are you trying to solve?**
11 | 
12 | **Describe the UX of the solution you'd like**
13 | 
14 | **Do you have thoughts on the technical implementation?**
15 | 
16 | **Describe alternatives you've considered**
17 | 
18 | **Additional context**
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/technical_proposal.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Technical Proposal
 3 | about: Propose a new architecture or technology
 4 | title: ''
 5 | labels: 'proposal'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Summary**
11 | 
12 | **Motivation**
13 | 
14 | **Technical Design**
15 | 
16 | **Alternatives to Consider**
17 | 
18 | **Additional context**
19 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 |   - package-ecosystem: "npm" # See documentation for possible values
13 |     directory: "/frontend" # Location of package manifests
14 |     schedule:
15 |       interval: "daily"
16 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Docs to GitHub Pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   build:
13 |     name: Build Docusaurus
14 |     runs-on: ubuntu-latest
15 |     if: github.repository == 'OpenDevin/OpenDevin'
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |         with:
19 |           fetch-depth: 0
20 |       - uses: actions/setup-node@v4
21 |         with:
22 |           node-version: 18
23 |           cache: npm
24 |           cache-dependency-path: docs/package-lock.json
25 |       - name: Set up Python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: "3.11"
29 | 
30 |       - name: Generate Python Docs
31 |         run: rm -rf docs/modules/python && pip install pydoc-markdown && pydoc-markdown
32 |       - name: Install dependencies
33 |         run: cd docs && npm ci
34 |       - name: Build website
35 |         run: cd docs && npm run build
36 | 
37 |       - name: Upload Build Artifact
38 |         if: github.ref == 'refs/heads/main'
39 |         uses: actions/upload-pages-artifact@v3
40 |         with:
41 |           path: docs/build
42 | 
43 |   deploy:
44 |     name: Deploy to GitHub Pages
45 |     needs: build
46 |     if: github.ref == 'refs/heads/main' && github.repository == 'OpenDevin/OpenDevin'
47 |     # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
48 |     permissions:
49 |       pages: write # to deploy to Pages
50 |       id-token: write # to verify the deployment originates from an appropriate source
51 |     # Deploy to the github-pages environment
52 |     environment:
53 |       name: github-pages
54 |       url: ${{ steps.deployment.outputs.page_url }}
55 |     runs-on: ubuntu-latest
56 |     steps:
57 |       - name: Deploy to GitHub Pages
58 |         id: deployment
59 |         uses: actions/deploy-pages@v4
60 | 


--------------------------------------------------------------------------------
/.github/workflows/dummy-agent-test.yml:
--------------------------------------------------------------------------------
 1 | name: Run e2e test with dummy agent
 2 | 
 3 | concurrency:
 4 |   group: ${{ github.workflow }}-${{ github.ref }}
 5 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |     - main
11 |   pull_request:
12 | 
13 | env:
14 |   PERSIST_SANDBOX : "false"
15 | 
16 | jobs:
17 |   test:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - name: Set up Python
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           python-version: '3.11'
25 |       - name: Set up environment
26 |         run: |
27 |           curl -sSL https://install.python-poetry.org | python3 -
28 |           poetry install --without evaluation
29 |           poetry run playwright install --with-deps chromium
30 |           wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
31 |       - name: Run tests
32 |         run: |
33 |           poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
34 | 


--------------------------------------------------------------------------------
/.github/workflows/ghcr.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Docker Image
 2 | 
 3 | concurrency:
 4 |   group: ${{ github.workflow }}-${{ github.ref }}
 5 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |     - main
11 |     tags:
12 |       - '*'
13 |   pull_request:
14 |   workflow_dispatch:
15 |     inputs:
16 |       reason:
17 |         description: 'Reason for manual trigger'
18 |         required: true
19 |         default: ''
20 | 
21 | jobs:
22 |   ghcr_build_and_push:
23 |     runs-on: ubuntu-latest
24 | 
25 |     permissions:
26 |       contents: read
27 |       packages: write
28 | 
29 |     strategy:
30 |       matrix:
31 |         image: ["app", "sandbox"]
32 | 
33 |     steps:
34 |       - name: checkout
35 |         uses: actions/checkout@v4
36 | 
37 |       - name: Free Disk Space (Ubuntu)
38 |         uses: jlumbroso/free-disk-space@main
39 |         with:
40 |           # this might remove tools that are actually needed,
41 |           # if set to "true" but frees about 6 GB
42 |           tool-cache: true
43 | 
44 |           # all of these default to true, but feel free to set to
45 |           # "false" if necessary for your workflow
46 |           android: true
47 |           dotnet: true
48 |           haskell: true
49 |           large-packages: true
50 |           docker-images: false
51 |           swap-storage: true
52 | 
53 |       - name: Set up QEMU
54 |         uses: docker/setup-qemu-action@v3
55 | 
56 |       - name: Set up Docker Buildx
57 |         id: buildx
58 |         uses: docker/setup-buildx-action@v3
59 | 
60 |       - name: Login to ghcr
61 |         uses: docker/login-action@v1
62 |         with:
63 |           registry: ghcr.io
64 |           username: ${{ github.repository_owner }}
65 |           password: ${{ secrets.GITHUB_TOKEN }}
66 | 
67 |       - name: Build and push ${{ matrix.image }}
68 |         if: "!github.event.pull_request.head.repo.fork"
69 |         run: |
70 |           ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} --push
71 | 
72 |       - name: Build ${{ matrix.image }}
73 |         if: "github.event.pull_request.head.repo.fork"
74 |         run: |
75 |           ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }}
76 | 
77 |   docker_build_success:
78 |     name: Docker Build Success
79 |     runs-on: ubuntu-latest
80 |     needs: ghcr_build_and_push
81 |     steps:
82 |     - run: echo Done!
83 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | concurrency:
 4 |   group: ${{ github.workflow }}-${{ github.ref }}
 5 |   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |     - main
11 |   pull_request:
12 | 
13 | jobs:
14 |   lint-frontend:
15 |     name: Lint frontend
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v4
19 | 
20 |       - name: Install Node.js 20
21 |         uses: actions/setup-node@v4
22 |         with:
23 |           node-version: 20
24 | 
25 |       - name: Install dependencies
26 |         run: |
27 |           cd frontend
28 |           npm install --frozen-lockfile
29 | 
30 |       - name: Lint
31 |         run: |
32 |           cd frontend
33 |           npm run lint
34 | 
35 |   lint-python:
36 |     name: Lint python
37 |     runs-on: ubuntu-latest
38 |     steps:
39 |       - uses: actions/checkout@v4
40 |         with:
41 |           fetch-depth: 0
42 |       - name: Set up python
43 |         uses: actions/setup-python@v5
44 |         with:
45 |           python-version: 3.11
46 |           cache: 'pip'
47 |       - name: Install pre-commit
48 |         run: pip install pre-commit==3.7.0
49 |       - name: Run pre-commit hooks
50 |         run: pre-commit run --files opendevin/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml
51 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: 'Close stale issues'
 2 | on:
 3 |   schedule:
 4 |     - cron: '30 1 * * *'
 5 | 
 6 | jobs:
 7 |   stale:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/stale@v9
11 |         with:
12 |           # Aggressively close issues that have been explicitly labeled `age-out`
13 |           any-of-labels: age-out
14 |           stale-issue-message: 'This issue is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 day.'
15 |           close-issue-message: 'This issue was closed because it has been stalled for over 7 days with no activity.'
16 |           stale-pr-message: 'This PR is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 days.'
17 |           close-pr-message: 'This PR was closed because it has been stalled for over 7 days with no activity.'
18 |           days-before-stale: 7
19 |           days-before-close: 1
20 | 
21 |       - uses: actions/stale@v9
22 |         with:
23 |           # Be more lenient with other issues
24 |           stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
25 |           close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
26 |           stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
27 |           close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
28 |           days-before-stale: 30
29 |           days-before-close: 7
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | =====================
 3 | 
 4 | Copyright © 2023
 5 | 
 6 | Permission is hereby granted, free of charge, to any person
 7 | obtaining a copy of this software and associated documentation
 8 | files (the “Software”), to deal in the Software without
 9 | restriction, including without limitation the rights to use,
10 | copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the
12 | Software is furnished to do so, subject to the following
13 | conditions:
14 | 
15 | The above copyright notice and this permission notice shall be
16 | included in all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | OTHER DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/LOG_VISUALIZER.md:
--------------------------------------------------------------------------------
1 | The log visualizer allows you to visualize the history of each agent session. To produce the log, simply remember to hit "Clear" after the end of each session, whereupon the log of that session will be saved to the folder `frontend_log`.
2 | 
3 | After that, run `python my_log_visualizer.py` to start the Gradio frontend for the visualization, where you can select the log file to visualize.
4 | 
5 | The visualization will not only include the MCTS planning search tree, but also the state, active strategy, and action for steps where the agent does not replan. Feel free to take advantage of this to debug any reasoning errors (e.g., not recognizing the task is done).
6 | 


--------------------------------------------------------------------------------
/agenthub/__init__.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | 
 3 | load_dotenv()
 4 | 
 5 | from . import (  # noqa: E402
 6 |     browsing_agent,
 7 |     dummy_web_agent,
 8 |     reasoner_agent_fast,
 9 |     reasoner_agent_full,
10 | )
11 | 
12 | __all__ = [
13 |     'browsing_agent',
14 |     'dummy_web_agent',
15 |     'reasoner_agent_full',
16 |     'reasoner_agent_fast',
17 | ]
18 | 


--------------------------------------------------------------------------------
/agenthub/browsing_agent/__init__.py:
--------------------------------------------------------------------------------
1 | from easyweb.controller.agent import Agent
2 | 
3 | from .browsing_agent import BrowsingAgent
4 | 
5 | Agent.register('BrowsingAgent', BrowsingAgent)
6 | 


--------------------------------------------------------------------------------
/agenthub/dummy_web_agent/README.md:
--------------------------------------------------------------------------------
 1 | # Browsing Agent Framework
 2 | 
 3 | This folder implements the basic BrowserGym [demo agent](https://github.com/ServiceNow/BrowserGym/tree/main/demo_agent) that enables full-featured web browsing.
 4 | 
 5 | 
 6 | ## Test run
 7 | 
 8 | Note that for browsing tasks, GPT-4 is usually a requirement to get reasonable results, due to the complexity of the web page structures.
 9 | 
10 | ```
11 | poetry run python ./opendevin/core/main.py \
12 |            -i 10 \
13 |            -t "tell me the usa's president using google search" \
14 |            -c BrowsingAgent \
15 |            -m gpt-4o-2024-05-13
16 | ```
17 | 


--------------------------------------------------------------------------------
/agenthub/dummy_web_agent/__init__.py:
--------------------------------------------------------------------------------
1 | from easyweb.controller.agent import Agent
2 | 
3 | from .dummy_web_agent import DummyWebAgent
4 | 
5 | Agent.register('DummyWebAgent', DummyWebAgent)
6 | 


--------------------------------------------------------------------------------
/agenthub/dummy_web_agent/commands.txt:
--------------------------------------------------------------------------------
1 | poetry run python ./opendevin/core/main.py \
2 |            -i 10 \
3 |            -t "tell me the usa's president using google search" \
4 |            -c WorldModelAgent \
5 |            -m gpt-4o-2024-05-13
6 | 


--------------------------------------------------------------------------------
/agenthub/reasoner_agent_fast/__init__.py:
--------------------------------------------------------------------------------
1 | from easyweb.controller.agent import Agent
2 | 
3 | from .reasoner_agent_fast import ReasonerAgentFast
4 | 
5 | Agent.register('ReasonerAgentFast', ReasonerAgentFast)
6 | 


--------------------------------------------------------------------------------
/agenthub/reasoner_agent_fast/reasoner_agent_fast.py:
--------------------------------------------------------------------------------
 1 | from reasoners import ReasonerAgent
 2 | 
 3 | from easyweb.controller.agent import Agent
 4 | from easyweb.controller.state.state import State
 5 | from easyweb.core.logger import easyweb_logger as logger
 6 | from easyweb.events.action import Action
 7 | from easyweb.llm.llm import LLM
 8 | from easyweb.runtime.plugins import (
 9 |     PluginRequirement,
10 | )
11 | from easyweb.runtime.tools import RuntimeTool
12 | 
13 | 
14 | class ReasonerAgentFast(Agent):
15 |     VERSION = '0.1'
16 |     """
17 |     An agent that uses agent model abstractions to interact with the browser.
18 |     """
19 | 
20 |     sandbox_plugins: list[PluginRequirement] = []
21 |     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
22 | 
23 |     def __init__(
24 |         self,
25 |         llm: LLM,
26 |     ) -> None:
27 |         """
28 |         Initializes a new instance of the AbstractBrowsingAgent class.
29 | 
30 |         Parameters:
31 |         - llm (Any): The llm to be used by this agent
32 |         """
33 |         super().__init__(llm)
34 | 
35 |         self.config_name = 'easyweb'
36 |         if 'gpt-4o-mini' in llm.model_name:
37 |             self.config_name = 'easyweb_mini'
38 | 
39 |         if 'o1' in llm.model_name or 'o3-mini' in llm.model_name:
40 |             llm = {
41 |                 'default': LLM(
42 |                     model='gpt-4o', api_key=llm.api_key, base_url=llm.base_url
43 |                 ),
44 |                 'policy': llm,
45 |             }
46 |         elif 'deepseek-reasoner' in llm.model_name:
47 |             llm = {
48 |                 'default': LLM(
49 |                     model='deepseek/deepseek-chat',
50 |                     api_key=llm.api_key,
51 |                     base_url=llm.base_url,
52 |                 ),
53 |                 'policy': llm,
54 |             }
55 | 
56 |         logger.info(f'Using {self.config_name}')
57 |         self.agent = ReasonerAgent(llm, config_name=self.config_name, logger=logger)
58 |         self.reset()
59 | 
60 |     def reset(self) -> None:
61 |         """
62 |         Resets the agent.
63 |         """
64 |         self.agent.reset()
65 | 
66 |     def step(self, env_state: State) -> Action:
67 |         return self.agent.step(env_state)
68 | 
69 |     def search_memory(self, query: str) -> list[str]:
70 |         raise NotImplementedError('Implement this abstract method')
71 | 


--------------------------------------------------------------------------------
/agenthub/reasoner_agent_full/__init__.py:
--------------------------------------------------------------------------------
1 | from easyweb.controller.agent import Agent
2 | 
3 | from .reasoner_agent_full import ReasonerAgentFull
4 | 
5 | Agent.register('ReasonerAgentFull', ReasonerAgentFull)
6 | 


--------------------------------------------------------------------------------
/agenthub/reasoner_agent_full/reasoner_agent_full.py:
--------------------------------------------------------------------------------
 1 | from reasoners import ReasonerAgent
 2 | 
 3 | from easyweb.controller.agent import Agent
 4 | from easyweb.controller.state.state import State
 5 | from easyweb.core.logger import easyweb_logger as logger
 6 | from easyweb.events.action import Action
 7 | from easyweb.llm.llm import LLM
 8 | from easyweb.runtime.plugins import (
 9 |     PluginRequirement,
10 | )
11 | from easyweb.runtime.tools import RuntimeTool
12 | 
13 | 
14 | class ReasonerAgentFull(Agent):
15 |     VERSION = '0.1'
16 |     """
17 |     An agent that uses agent model abstractions to interact with the browser.
18 |     """
19 | 
20 |     sandbox_plugins: list[PluginRequirement] = []
21 |     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
22 | 
23 |     def __init__(
24 |         self,
25 |         llm: LLM,
26 |     ) -> None:
27 |         """
28 |         Initializes a new instance of the AbstractBrowsingAgent class.
29 | 
30 |         Parameters:
31 |         - llm (LLM): The llm to be used by this agent
32 |         """
33 |         super().__init__(llm)
34 |         if 'gpt-4o-mini' in llm.model_name:
35 |             self.config_name = 'easyweb_mini_world_model'
36 |         else:
37 |             self.config_name = 'easyweb_world_model'
38 | 
39 |         logger.info(f'Using {self.config_name}')
40 |         self.agent = ReasonerAgent(llm, config_name=self.config_name, logger=logger)
41 |         self.reset()
42 | 
43 |     def reset(self) -> None:
44 |         """
45 |         Resets the agent.
46 |         """
47 |         self.agent.reset()
48 | 
49 |     def step(self, env_state: State) -> Action:
50 |         return self.agent.step(env_state)
51 | 
52 |     def search_memory(self, query: str) -> list[str]:
53 |         raise NotImplementedError('Implement this abstract method')
54 | 


--------------------------------------------------------------------------------
/config.toml:
--------------------------------------------------------------------------------
1 | [core]
2 | workspace_base="../workspace"
3 | persist_sandbox=false
4 | 
5 | [llm]
6 | model_port_config_file="./model_port_config.json"
7 | 


--------------------------------------------------------------------------------
/dev_config/python/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: trailing-whitespace
 6 |         exclude: docs/modules/python
 7 |       - id: end-of-file-fixer
 8 |         exclude: docs/modules/python
 9 |       - id: check-yaml
10 |       - id: debug-statements
11 | 
12 |   - repo: https://github.com/tox-dev/pyproject-fmt
13 |     rev: 1.7.0
14 |     hooks:
15 |       - id: pyproject-fmt
16 |   - repo: https://github.com/abravalheri/validate-pyproject
17 |     rev: v0.16
18 |     hooks:
19 |       - id: validate-pyproject
20 | 
21 |   - repo: https://github.com/astral-sh/ruff-pre-commit
22 |     # Ruff version.
23 |     rev: v0.4.1
24 |     hooks:
25 |       # Run the linter.
26 |       - id: ruff
27 |         entry: ruff check --config dev_config/python/ruff.toml
28 |         types_or: [python, pyi, jupyter]
29 |         args: [--fix]
30 |       # Run the formatter.
31 |       - id: ruff-format
32 |         entry: ruff format --config dev_config/python/ruff.toml
33 |         types_or: [python, pyi, jupyter]
34 | 
35 |   # - repo: https://github.com/pre-commit/mirrors-mypy
36 |   #   rev: v1.9.0
37 |   #   hooks:
38 |   #     - id: mypy
39 |   #       additional_dependencies:
40 |   #         [types-requests, types-setuptools, types-pyyaml, types-toml]
41 |   #       entry: mypy --config-file dev_config/python/mypy.ini --exclude easyweb/__init__.py easyweb/ agenthub/
42 |   #       always_run: true
43 |   #       pass_filenames: false
44 | 


--------------------------------------------------------------------------------
/dev_config/python/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | warn_unused_configs = True
 3 | ignore_missing_imports = True
 4 | check_untyped_defs = True
 5 | explicit_package_bases = True
 6 | warn_unreachable = True
 7 | warn_redundant_casts = True
 8 | no_implicit_optional = True
 9 | strict_optional = True
10 | 


--------------------------------------------------------------------------------
/dev_config/python/ruff.toml:
--------------------------------------------------------------------------------
 1 | [lint]
 2 | select = [
 3 |     "E",
 4 |     "W",
 5 |     "F",
 6 |     "I",
 7 |     "Q",
 8 |     "B",
 9 | ]
10 | 
11 | ignore = [
12 |     "E501",
13 |     "B003",
14 |     "B007",
15 |     "B009",
16 |     "B010",
17 |     "B904",
18 |     "B018",
19 | ]
20 | 
21 | [lint.flake8-quotes]
22 | docstring-quotes = "double"
23 | inline-quotes = "single"
24 | 
25 | [format]
26 | quote-style = "single"
27 | 


--------------------------------------------------------------------------------
/easy-web-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easy-web-icon.png


--------------------------------------------------------------------------------
/easyweb/README.md:
--------------------------------------------------------------------------------
 1 | # OpenDevin Shared Abstraction and Components
 2 | 
 3 | This is a Python package that contains all the shared abstraction (e.g., Agent) and components (e.g., sandbox, web browser, search API, selenium).
 4 | 
 5 | See the [main README](../README.md) for instructions on how to run OpenDevin from the command line.
 6 | 
 7 | ## Sandbox Image
 8 | ```bash
 9 | docker build -f opendevin/sandbox/docker/Dockerfile -t opendevin/sandbox:v0.1 .
10 | ```
11 | 
12 | ## Sandbox Runner
13 | 
14 | Run the docker-based interactive sandbox:
15 | 
16 | ```bash
17 | mkdir workspace
18 | python3 opendevin/sandbox/docker/sandbox.py -d workspace
19 | ```
20 | 
21 | It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user.
22 | 
23 | Example screenshot:
24 | 
25 | <img width="868" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/8dedcdee-437a-4469-870f-be29ca2b7c32">
26 | 


--------------------------------------------------------------------------------
/easyweb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/__init__.py


--------------------------------------------------------------------------------
/easyweb/controller/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent_controller import AgentController
2 | 
3 | __all__ = [
4 |     'AgentController',
5 | ]
6 | 


--------------------------------------------------------------------------------
/easyweb/controller/action_parser.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from easyweb.events.action import Action
 4 | 
 5 | 
 6 | class ResponseParser(ABC):
 7 |     """
 8 |     This abstract base class is a general interface for an response parser dedicated to
 9 |     parsing the action from the response from the LLM.
10 |     """
11 | 
12 |     def __init__(
13 |         self,
14 |     ):
15 |         # Need pay attention to the item order in self.action_parsers
16 |         self.action_parsers = []
17 | 
18 |     @abstractmethod
19 |     def parse(self, response: str) -> Action:
20 |         """
21 |         Parses the action from the response from the LLM.
22 | 
23 |         Parameters:
24 |         - response (str): The response from the LLM.
25 | 
26 |         Returns:
27 |         - action (Action): The action parsed from the response.
28 |         """
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def parse_response(self, response) -> str:
33 |         """
34 |         Parses the action from the response from the LLM.
35 | 
36 |         Parameters:
37 |         - response (str): The response from the LLM.
38 | 
39 |         Returns:
40 |         - action_str (str): The action str parsed from the response.
41 |         """
42 |         pass
43 | 
44 |     @abstractmethod
45 |     def parse_action(self, action_str: str) -> Action:
46 |         """
47 |         Parses the action from the response from the LLM.
48 | 
49 |         Parameters:
50 |         - action_str (str): The response from the LLM.
51 | 
52 |         Returns:
53 |         - action (Action): The action parsed from the response.
54 |         """
55 |         pass
56 | 
57 | 
58 | class ActionParser(ABC):
59 |     """
60 |     This abstract base class is an general interface for an action parser dedicated to
61 |     parsing the action from the action str from the LLM.
62 |     """
63 | 
64 |     @abstractmethod
65 |     def check_condition(self, action_str: str) -> bool:
66 |         """
67 |         Check if the action string can be parsed by this parser.
68 |         """
69 |         pass
70 | 
71 |     @abstractmethod
72 |     def parse(self, action_str: str) -> Action:
73 |         """
74 |         Parses the action from the action string from the LLM response.
75 |         """
76 |         pass
77 | 


--------------------------------------------------------------------------------
/easyweb/core/const/guide_url.py:
--------------------------------------------------------------------------------
1 | TROUBLESHOOTING_URL = (
2 |     'https://opendevin.github.io/OpenDevin/modules/usage/troubleshooting'
3 | )
4 | 


--------------------------------------------------------------------------------
/easyweb/core/download.py:
--------------------------------------------------------------------------------
1 | # Run this file to trigger a model download
2 | import agenthub  # noqa F401 (we import this to get the agents registered)
3 | 


--------------------------------------------------------------------------------
/easyweb/core/exceptions.py:
--------------------------------------------------------------------------------
 1 | class MaxCharsExceedError(Exception):
 2 |     def __init__(self, num_of_chars=None, max_chars_limit=None):
 3 |         if num_of_chars is not None and max_chars_limit is not None:
 4 |             message = f'Number of characters {num_of_chars} exceeds MAX_CHARS limit: {max_chars_limit}'
 5 |         else:
 6 |             message = 'Number of characters exceeds MAX_CHARS limit'
 7 |         super().__init__(message)
 8 | 
 9 | 
10 | class AgentNoInstructionError(Exception):
11 |     def __init__(self, message='Instruction must be provided'):
12 |         super().__init__(message)
13 | 
14 | 
15 | class AgentEventTypeError(Exception):
16 |     def __init__(self, message='Event must be a dictionary'):
17 |         super().__init__(message)
18 | 
19 | 
20 | class AgentAlreadyRegisteredError(Exception):
21 |     def __init__(self, name=None):
22 |         if name is not None:
23 |             message = f"Agent class already registered under '{name}'"
24 |         else:
25 |             message = 'Agent class already registered'
26 |         super().__init__(message)
27 | 
28 | 
29 | class AgentNotRegisteredError(Exception):
30 |     def __init__(self, name=None):
31 |         if name is not None:
32 |             message = f"No agent class registered under '{name}'"
33 |         else:
34 |             message = 'No agent class registered'
35 |         super().__init__(message)
36 | 
37 | 
38 | class LLMOutputError(Exception):
39 |     def __init__(self, message):
40 |         super().__init__(message)
41 | 
42 | 
43 | class SandboxInvalidBackgroundCommandError(Exception):
44 |     def __init__(self, id=None):
45 |         if id is not None:
46 |             message = f'Invalid background command id {id}'
47 |         else:
48 |             message = 'Invalid background command id'
49 |         super().__init__(message)
50 | 
51 | 
52 | class TaskInvalidStateError(Exception):
53 |     def __init__(self, state=None):
54 |         if state is not None:
55 |             message = f'Invalid state {state}'
56 |         else:
57 |             message = 'Invalid state'
58 |         super().__init__(message)
59 | 
60 | 
61 | class BrowserInitException(Exception):
62 |     def __init__(self, message='Failed to initialize browser environment'):
63 |         super().__init__(message)
64 | 
65 | 
66 | class BrowserUnavailableException(Exception):
67 |     def __init__(
68 |         self,
69 |         message='Browser environment is not available, please check if has been initialized',
70 |     ):
71 |         super().__init__(message)
72 | 
73 | 
74 | # These exceptions get sent back to the LLM
75 | class AgentMalformedActionError(Exception):
76 |     def __init__(self, message='Malformed response'):
77 |         super().__init__(message)
78 | 
79 | 
80 | class AgentNoActionError(Exception):
81 |     def __init__(self, message='Agent must return an action'):
82 |         super().__init__(message)
83 | 


--------------------------------------------------------------------------------
/easyweb/core/metrics.py:
--------------------------------------------------------------------------------
 1 | class Metrics:
 2 |     """
 3 |     Metrics class can record various metrics during running and evaluation.
 4 |     Currently we define the following metrics:
 5 |         accumulated_cost: the total cost (USD $) of the current LLM.
 6 |     """
 7 | 
 8 |     def __init__(self) -> None:
 9 |         self._accumulated_cost: float = 0.0
10 |         self._costs: list[float] = []
11 | 
12 |     @property
13 |     def accumulated_cost(self) -> float:
14 |         return self._accumulated_cost
15 | 
16 |     @accumulated_cost.setter
17 |     def accumulated_cost(self, value: float) -> None:
18 |         if value < 0:
19 |             raise ValueError('Total cost cannot be negative.')
20 |         self._accumulated_cost = value
21 | 
22 |     @property
23 |     def costs(self) -> list:
24 |         return self._costs
25 | 
26 |     def add_cost(self, value: float) -> None:
27 |         if value < 0:
28 |             raise ValueError('Added cost cannot be negative.')
29 |         self._accumulated_cost += value
30 |         self._costs.append(value)
31 | 
32 |     def get(self):
33 |         """
34 |         Return the metrics in a dictionary.
35 |         """
36 |         return {'accumulated_cost': self._accumulated_cost, 'costs': self._costs}
37 | 
38 |     def log(self):
39 |         """
40 |         Log the metrics.
41 |         """
42 |         metrics = self.get()
43 |         logs = ''
44 |         for key, value in metrics.items():
45 |             logs += f'{key}: {value}\n'
46 |         return logs
47 | 


--------------------------------------------------------------------------------
/easyweb/core/schema/__init__.py:
--------------------------------------------------------------------------------
 1 | from .action import ActionType
 2 | from .agent import AgentState
 3 | from .config import ConfigType
 4 | from .observation import ObservationType
 5 | from .stream import CancellableStream, StreamMixin
 6 | 
 7 | __all__ = [
 8 |     'ActionType',
 9 |     'ObservationType',
10 |     'ConfigType',
11 |     'AgentState',
12 |     'CancellableStream',
13 |     'StreamMixin',
14 | ]
15 | 


--------------------------------------------------------------------------------
/easyweb/core/schema/action.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel, Field
  2 | 
  3 | __all__ = ['ActionType']
  4 | 
  5 | 
  6 | class ActionTypeSchema(BaseModel):
  7 |     INIT: str = Field(default='initialize')
  8 |     """Initializes the agent. Only sent by client.
  9 |     """
 10 | 
 11 |     MESSAGE: str = Field(default='message')
 12 |     """Represents a message.
 13 |     """
 14 | 
 15 |     START: str = Field(default='start')
 16 |     """Starts a new development task OR send chat from the user. Only sent by the client.
 17 |     """
 18 | 
 19 |     READ: str = Field(default='read')
 20 |     """Reads the content of a file.
 21 |     """
 22 | 
 23 |     WRITE: str = Field(default='write')
 24 |     """Writes the content to a file.
 25 |     """
 26 | 
 27 |     RUN: str = Field(default='run')
 28 |     """Runs a command.
 29 |     """
 30 | 
 31 |     RUN_IPYTHON: str = Field(default='run_ipython')
 32 |     """Runs a IPython cell.
 33 |     """
 34 | 
 35 |     KILL: str = Field(default='kill')
 36 |     """Kills a background command.
 37 |     """
 38 | 
 39 |     BROWSE: str = Field(default='browse')
 40 |     """Opens a web page.
 41 |     """
 42 | 
 43 |     BROWSE_INTERACTIVE: str = Field(default='browse_interactive')
 44 |     """Interact with the browser instance.
 45 |     """
 46 | 
 47 |     RECALL: str = Field(default='recall')
 48 |     """Searches long-term memory
 49 |     """
 50 | 
 51 |     DELEGATE: str = Field(default='delegate')
 52 |     """Delegates a task to another agent.
 53 |     """
 54 | 
 55 |     FINISH: str = Field(default='finish')
 56 |     """If you're absolutely certain that you've completed your task and have tested your work,
 57 |     use the finish action to stop working.
 58 |     """
 59 | 
 60 |     REJECT: str = Field(default='reject')
 61 |     """If you're absolutely certain that you cannot complete the task with given requirements,
 62 |     use the reject action to stop working.
 63 |     """
 64 | 
 65 |     NULL: str = Field(default='null')
 66 | 
 67 |     SUMMARIZE: str = Field(default='summarize')
 68 | 
 69 |     ADD_TASK: str = Field(default='add_task')
 70 | 
 71 |     MODIFY_TASK: str = Field(default='modify_task')
 72 | 
 73 |     PAUSE: str = Field(default='pause')
 74 |     """Pauses the task.
 75 |     """
 76 | 
 77 |     RESUME: str = Field(default='resume')
 78 |     """Resumes the task.
 79 |     """
 80 | 
 81 |     STOP: str = Field(default='stop')
 82 |     """Stops the task. Must send a start action to restart a new task.
 83 |     """
 84 | 
 85 |     CHANGE_AGENT_STATE: str = Field(default='change_agent_state')
 86 | 
 87 |     PUSH: str = Field(default='push')
 88 |     """Push a branch to github."""
 89 | 
 90 |     SEND_PR: str = Field(default='send_pr')
 91 |     """Send a PR to github."""
 92 | 
 93 |     START_PLANNING: str = Field(default='start_planning')
 94 |     """Start planning for the next action"""
 95 | 
 96 |     FINISH_PLANNING: str = Field(default='finish_planning')
 97 |     """Finish planning for the next action"""
 98 | 
 99 | 
100 | ActionType = ActionTypeSchema()
101 | 


--------------------------------------------------------------------------------
/easyweb/core/schema/agent.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class AgentState(str, Enum):
 5 |     LOADING = 'loading'
 6 |     """The agent is loading.
 7 |     """
 8 | 
 9 |     INIT = 'init'
10 |     """The agent is initialized.
11 |     """
12 | 
13 |     RUNNING = 'running'
14 |     """The agent is running.
15 |     """
16 | 
17 |     AWAITING_USER_INPUT = 'awaiting_user_input'
18 |     """The agent is awaiting user input.
19 |     """
20 | 
21 |     PAUSED = 'paused'
22 |     """The agent is paused.
23 |     """
24 | 
25 |     STOPPED = 'stopped'
26 |     """The agent is stopped.
27 |     """
28 | 
29 |     FINISHED = 'finished'
30 |     """The agent is finished with the current task.
31 |     """
32 | 
33 |     REJECTED = 'rejected'
34 |     """The agent rejects the task.
35 |     """
36 | 
37 |     ERROR = 'error'
38 |     """An error occurred during the task.
39 |     """
40 | 


--------------------------------------------------------------------------------
/easyweb/core/schema/config.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class ConfigType(str, Enum):
 5 |     # For frontend
 6 |     LLM_CUSTOM_LLM_PROVIDER = 'LLM_CUSTOM_LLM_PROVIDER'
 7 |     LLM_MAX_INPUT_TOKENS = 'LLM_MAX_INPUT_TOKENS'
 8 |     LLM_MAX_OUTPUT_TOKENS = 'LLM_MAX_OUTPUT_TOKENS'
 9 |     LLM_TOP_P = 'LLM_TOP_P'
10 |     LLM_TEMPERATURE = 'LLM_TEMPERATURE'
11 |     LLM_TIMEOUT = 'LLM_TIMEOUT'
12 |     LLM_API_KEY = 'LLM_API_KEY'
13 |     LLM_BASE_URL = 'LLM_BASE_URL'
14 |     AWS_ACCESS_KEY_ID = 'AWS_ACCESS_KEY_ID'
15 |     AWS_SECRET_ACCESS_KEY = 'AWS_SECRET_ACCESS_KEY'
16 |     AWS_REGION_NAME = 'AWS_REGION_NAME'
17 |     WORKSPACE_BASE = 'WORKSPACE_BASE'
18 |     WORKSPACE_MOUNT_PATH = 'WORKSPACE_MOUNT_PATH'
19 |     WORKSPACE_MOUNT_REWRITE = 'WORKSPACE_MOUNT_REWRITE'
20 |     WORKSPACE_MOUNT_PATH_IN_SANDBOX = 'WORKSPACE_MOUNT_PATH_IN_SANDBOX'
21 |     CACHE_DIR = 'CACHE_DIR'
22 |     LLM_MODEL = 'LLM_MODEL'
23 |     SANDBOX_CONTAINER_IMAGE = 'SANDBOX_CONTAINER_IMAGE'
24 |     RUN_AS_DEVIN = 'RUN_AS_DEVIN'
25 |     LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL'
26 |     LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL'
27 |     LLM_EMBEDDING_DEPLOYMENT_NAME = 'LLM_EMBEDDING_DEPLOYMENT_NAME'
28 |     LLM_API_VERSION = 'LLM_API_VERSION'
29 |     LLM_NUM_RETRIES = 'LLM_NUM_RETRIES'
30 |     LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT'
31 |     LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT'
32 |     AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS'
33 |     AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED'
34 |     MAX_ITERATIONS = 'MAX_ITERATIONS'
35 |     MAX_CHARS = 'MAX_CHARS'
36 |     AGENT = 'AGENT'
37 |     E2B_API_KEY = 'E2B_API_KEY'
38 |     SANDBOX_TYPE = 'SANDBOX_TYPE'
39 |     SANDBOX_USER_ID = 'SANDBOX_USER_ID'
40 |     SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
41 |     USE_HOST_NETWORK = 'USE_HOST_NETWORK'
42 |     SSH_HOSTNAME = 'SSH_HOSTNAME'
43 |     DISABLE_COLOR = 'DISABLE_COLOR'
44 |     DEBUG = 'DEBUG'
45 | 


--------------------------------------------------------------------------------
/easyweb/core/schema/observation.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | __all__ = ['ObservationType']
 4 | 
 5 | 
 6 | class ObservationTypeSchema(BaseModel):
 7 |     READ: str = Field(default='read')
 8 |     """The content of a file
 9 |     """
10 | 
11 |     WRITE: str = Field(default='write')
12 | 
13 |     BROWSE: str = Field(default='browse')
14 |     """The HTML content of a URL
15 |     """
16 | 
17 |     RUN: str = Field(default='run')
18 |     """The output of a command
19 |     """
20 | 
21 |     RUN_IPYTHON: str = Field(default='run_ipython')
22 |     """Runs a IPython cell.
23 |     """
24 | 
25 |     RECALL: str = Field(default='recall')
26 |     """The result of a search
27 |     """
28 | 
29 |     CHAT: str = Field(default='chat')
30 |     """A message from the user
31 |     """
32 | 
33 |     DELEGATE: str = Field(default='delegate')
34 |     """The result of a task delegated to another agent
35 |     """
36 | 
37 |     MESSAGE: str = Field(default='message')
38 | 
39 |     ERROR: str = Field(default='error')
40 | 
41 |     SUCCESS: str = Field(default='success')
42 | 
43 |     NULL: str = Field(default='null')
44 | 
45 |     AGENT_STATE_CHANGED: str = Field(default='agent_state_changed')
46 | 
47 | 
48 | ObservationType = ObservationTypeSchema()
49 | 


--------------------------------------------------------------------------------
/easyweb/core/schema/stream.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Union
 3 | 
 4 | 
 5 | class StreamMixin:
 6 |     def __init__(self, generator):
 7 |         self.generator = generator
 8 |         self.closed = False
 9 | 
10 |     def __iter__(self):
11 |         return self
12 | 
13 |     def __next__(self):
14 |         if self.closed:
15 |             raise StopIteration
16 |         else:
17 |             return next(self.generator)
18 | 
19 | 
20 | class CancellableStream(StreamMixin, ABC):
21 |     @abstractmethod
22 |     def close(self):
23 |         pass
24 | 
25 |     @abstractmethod
26 |     def exit_code(self) -> Union[int, None]:
27 |         pass
28 | 


--------------------------------------------------------------------------------
/easyweb/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .singleton import Singleton
2 | 
3 | __all__ = ['Singleton']
4 | 


--------------------------------------------------------------------------------
/easyweb/core/utils/json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datetime import datetime
 3 | 
 4 | from json_repair import repair_json
 5 | 
 6 | from easyweb.core.exceptions import LLMOutputError
 7 | from easyweb.events.event import Event
 8 | from easyweb.events.serialization import event_to_dict
 9 | 
10 | 
11 | def my_default_encoder(obj):
12 |     """
13 |     Custom JSON encoder that handles datetime and event objects
14 |     """
15 |     if isinstance(obj, datetime):
16 |         return obj.isoformat()
17 |     if isinstance(obj, Event):
18 |         return event_to_dict(obj)
19 |     return json.JSONEncoder().default(obj)
20 | 
21 | 
22 | def dumps(obj, **kwargs):
23 |     """
24 |     Serialize an object to str format
25 |     """
26 | 
27 |     return json.dumps(obj, default=my_default_encoder, **kwargs)
28 | 
29 | 
30 | def loads(json_str, **kwargs):
31 |     """
32 |     Create a JSON object from str
33 |     """
34 |     try:
35 |         return json.loads(json_str, **kwargs)
36 |     except json.JSONDecodeError:
37 |         pass
38 |     depth = 0
39 |     start = -1
40 |     for i, char in enumerate(json_str):
41 |         if char == '{':
42 |             if depth == 0:
43 |                 start = i
44 |             depth += 1
45 |         elif char == '}':
46 |             depth -= 1
47 |             if depth == 0 and start != -1:
48 |                 response = json_str[start : i + 1]
49 |                 try:
50 |                     json_str = repair_json(response)
51 |                     return json.loads(json_str, **kwargs)
52 |                 except (json.JSONDecodeError, ValueError, TypeError) as e:
53 |                     raise LLMOutputError(
54 |                         'Invalid JSON in response. Please make sure the response is a valid JSON object.'
55 |                     ) from e
56 |     raise LLMOutputError('No valid JSON object found in response.')
57 | 


--------------------------------------------------------------------------------
/easyweb/core/utils/singleton.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | 
 3 | 
 4 | class Singleton(type):
 5 |     _instances: dict = {}
 6 | 
 7 |     def __call__(cls, *args, **kwargs):
 8 |         if cls not in cls._instances:
 9 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
10 |         else:
11 |             # allow updates, just update existing instance
12 |             # perhaps not the most orthodox way to do it, though it simplifies client code
13 |             # useful for pre-defined groups of settings
14 |             instance = cls._instances[cls]
15 |             for key, value in kwargs.items():
16 |                 setattr(instance, key, value)
17 |         return cls._instances[cls]
18 | 
19 |     @classmethod
20 |     def reset(cls):
21 |         # used by pytest to reset the state of the singleton instances
22 |         for instance_type, instance in cls._instances.items():
23 |             print('resetting... ', instance_type)
24 |             for field in dataclasses.fields(instance_type):
25 |                 if dataclasses.is_dataclass(field.type):
26 |                     setattr(instance, field.name, field.type())
27 |                 else:
28 |                     setattr(instance, field.name, field.default)
29 | 


--------------------------------------------------------------------------------
/easyweb/events/__init__.py:
--------------------------------------------------------------------------------
 1 | from .event import Event, EventSource
 2 | from .stream import EventStream, EventStreamSubscriber
 3 | 
 4 | __all__ = [
 5 |     'Event',
 6 |     'EventSource',
 7 |     'EventStream',
 8 |     'EventStreamSubscriber',
 9 | ]
10 | 


--------------------------------------------------------------------------------
/easyweb/events/action/__init__.py:
--------------------------------------------------------------------------------
 1 | from .action import Action
 2 | from .agent import (
 3 |     AgentDelegateAction,
 4 |     AgentFinishAction,
 5 |     AgentRecallAction,
 6 |     AgentRejectAction,
 7 |     AgentSummarizeAction,
 8 |     ChangeAgentStateAction,
 9 | )
10 | from .browse import BrowseInteractiveAction, BrowseURLAction
11 | from .commands import CmdKillAction, CmdRunAction, IPythonRunCellAction
12 | from .empty import NullAction
13 | from .files import FileReadAction, FileWriteAction
14 | from .message import MessageAction
15 | from .planning import FinishPlanningAction, StartPlanningAction
16 | from .tasks import AddTaskAction, ModifyTaskAction
17 | 
18 | __all__ = [
19 |     'Action',
20 |     'NullAction',
21 |     'CmdRunAction',
22 |     'CmdKillAction',
23 |     'BrowseURLAction',
24 |     'BrowseInteractiveAction',
25 |     'FileReadAction',
26 |     'FileWriteAction',
27 |     'AgentRecallAction',
28 |     'AgentFinishAction',
29 |     'AgentRejectAction',
30 |     'AgentDelegateAction',
31 |     'AgentSummarizeAction',
32 |     'AddTaskAction',
33 |     'ModifyTaskAction',
34 |     'ChangeAgentStateAction',
35 |     'IPythonRunCellAction',
36 |     'MessageAction',
37 |     'StartPlanningAction',
38 |     'FinishPlanningAction',
39 | ]
40 | 


--------------------------------------------------------------------------------
/easyweb/events/action/action.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from easyweb.events.event import Event
 5 | 
 6 | 
 7 | @dataclass
 8 | class Action(Event):
 9 |     runnable: ClassVar[bool] = False
10 | 


--------------------------------------------------------------------------------
/easyweb/events/action/agent.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import ClassVar
 3 | 
 4 | from easyweb.core.schema import ActionType
 5 | 
 6 | from .action import Action
 7 | 
 8 | 
 9 | @dataclass
10 | class ChangeAgentStateAction(Action):
11 |     """Fake action, just to notify the client that a task state has changed."""
12 | 
13 |     agent_state: str
14 |     thought: str = ''
15 |     action: str = ActionType.CHANGE_AGENT_STATE
16 | 
17 |     @property
18 |     def message(self) -> str:
19 |         return f'Agent state changed to {self.agent_state}'
20 | 
21 | 
22 | @dataclass
23 | class AgentRecallAction(Action):
24 |     query: str
25 |     thought: str = ''
26 |     action: str = ActionType.RECALL
27 |     runnable: ClassVar[bool] = True
28 | 
29 |     @property
30 |     def message(self) -> str:
31 |         return f"Let me dive into my memories to find what you're looking for! Searching for: '{self.query}'. This might take a moment."
32 | 
33 | 
34 | @dataclass
35 | class AgentSummarizeAction(Action):
36 |     summary: str
37 |     action: str = ActionType.SUMMARIZE
38 | 
39 |     @property
40 |     def message(self) -> str:
41 |         return self.summary
42 | 
43 |     def __str__(self) -> str:
44 |         ret = '**AgentSummarizeAction**\n'
45 |         ret += f'SUMMARY: {self.summary}'
46 |         return ret
47 | 
48 | 
49 | @dataclass
50 | class AgentFinishAction(Action):
51 |     outputs: dict = field(default_factory=dict)
52 |     thought: str = ''
53 |     action: str = ActionType.FINISH
54 | 
55 |     @property
56 |     def message(self) -> str:
57 |         # return "All done! What's next on the agenda?"
58 |         return 'Task complete! How can I assist you next?'
59 | 
60 | 
61 | @dataclass
62 | class AgentRejectAction(Action):
63 |     outputs: dict = field(default_factory=dict)
64 |     thought: str = ''
65 |     action: str = ActionType.REJECT
66 | 
67 |     @property
68 |     def message(self) -> str:
69 |         return 'Task is rejected by the agent.'
70 | 
71 | 
72 | @dataclass
73 | class AgentDelegateAction(Action):
74 |     agent: str
75 |     inputs: dict
76 |     thought: str = ''
77 |     action: str = ActionType.DELEGATE
78 | 
79 |     @property
80 |     def message(self) -> str:
81 |         return f"I'm asking {self.agent} for help with this task."
82 | 


--------------------------------------------------------------------------------
/easyweb/events/action/browse.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from easyweb.core.schema import ActionType
 5 | 
 6 | from .action import Action
 7 | 
 8 | 
 9 | @dataclass
10 | class BrowseURLAction(Action):
11 |     url: str
12 |     thought: str = ''
13 |     action: str = ActionType.BROWSE
14 |     runnable: ClassVar[bool] = True
15 | 
16 |     @property
17 |     def message(self) -> str:
18 |         return f'Browsing URL: {self.url}'
19 | 
20 |     def __str__(self) -> str:
21 |         ret = '**BrowseURLAction**\n'
22 |         if self.thought:
23 |             ret += f'THOUGHT: {self.thought}\n'
24 |         ret += f'URL: {self.url}'
25 |         return ret
26 | 
27 | 
28 | @dataclass
29 | class BrowseInteractiveAction(Action):
30 |     browser_actions: str
31 |     thought: str = ''
32 |     browsergym_send_msg_to_user: str = ''
33 |     action: str = ActionType.BROWSE_INTERACTIVE
34 |     runnable: ClassVar[bool] = True
35 | 
36 |     @property
37 |     def message(self) -> str:
38 |         return f'Executing browser actions: {self.browser_actions}'
39 | 
40 |     def __str__(self) -> str:
41 |         ret = '**BrowseInteractiveAction**\n'
42 |         if self.thought:
43 |             ret += f'THOUGHT: {self.thought}\n'
44 |         ret += f'BROWSER_ACTIONS: {self.browser_actions}'
45 |         return ret
46 | 


--------------------------------------------------------------------------------
/easyweb/events/action/commands.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from easyweb.core.schema import ActionType
 5 | 
 6 | from .action import Action
 7 | 
 8 | 
 9 | @dataclass
10 | class CmdRunAction(Action):
11 |     command: str
12 |     background: bool = False
13 |     thought: str = ''
14 |     action: str = ActionType.RUN
15 |     runnable: ClassVar[bool] = True
16 | 
17 |     @property
18 |     def message(self) -> str:
19 |         return f'Running command: {self.command}'
20 | 
21 |     def __str__(self) -> str:
22 |         ret = '**CmdRunAction**\n'
23 |         if self.thought:
24 |             ret += f'THOUGHT: {self.thought}\n'
25 |         ret += f'COMMAND:\n{self.command}'
26 |         return ret
27 | 
28 | 
29 | @dataclass
30 | class CmdKillAction(Action):
31 |     command_id: int
32 |     thought: str = ''
33 |     action: str = ActionType.KILL
34 |     runnable: ClassVar[bool] = True
35 | 
36 |     @property
37 |     def message(self) -> str:
38 |         return f'Killing command: {self.command_id}'
39 | 
40 |     def __str__(self) -> str:
41 |         return f'**CmdKillAction**\n{self.command_id}'
42 | 
43 | 
44 | @dataclass
45 | class IPythonRunCellAction(Action):
46 |     code: str
47 |     thought: str = ''
48 |     action: str = ActionType.RUN_IPYTHON
49 |     runnable: ClassVar[bool] = True
50 |     kernel_init_code: str = ''  # code to run in the kernel (if the kernel is restarted)
51 | 
52 |     def __str__(self) -> str:
53 |         ret = '**IPythonRunCellAction**\n'
54 |         if self.thought:
55 |             ret += f'THOUGHT: {self.thought}\n'
56 |         ret += f'CODE:\n{self.code}'
57 |         return ret
58 | 
59 |     @property
60 |     def message(self) -> str:
61 |         return f'Running Python code interactively: {self.code}'
62 | 


--------------------------------------------------------------------------------
/easyweb/events/action/empty.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ActionType
 4 | 
 5 | from .action import Action
 6 | 
 7 | 
 8 | @dataclass
 9 | class NullAction(Action):
10 |     """An action that does nothing."""
11 | 
12 |     action: str = ActionType.NULL
13 | 
14 |     @property
15 |     def message(self) -> str:
16 |         return 'No action'
17 | 


--------------------------------------------------------------------------------
/easyweb/events/action/files.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from easyweb.core.schema import ActionType
 5 | 
 6 | from .action import Action
 7 | 
 8 | 
 9 | @dataclass
10 | class FileReadAction(Action):
11 |     """
12 |     Reads a file from a given path.
13 |     Can be set to read specific lines using start and end
14 |     Default lines 0:-1 (whole file)
15 |     """
16 | 
17 |     path: str
18 |     start: int = 0
19 |     end: int = -1
20 |     thought: str = ''
21 |     action: str = ActionType.READ
22 |     runnable: ClassVar[bool] = True
23 | 
24 |     @property
25 |     def message(self) -> str:
26 |         return f'Reading file: {self.path}'
27 | 
28 | 
29 | @dataclass
30 | class FileWriteAction(Action):
31 |     path: str
32 |     content: str
33 |     start: int = 0
34 |     end: int = -1
35 |     thought: str = ''
36 |     action: str = ActionType.WRITE
37 |     runnable: ClassVar[bool] = True
38 | 
39 |     @property
40 |     def message(self) -> str:
41 |         return f'Writing file: {self.path}'
42 | 


--------------------------------------------------------------------------------
/easyweb/events/action/message.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ActionType
 4 | 
 5 | from .action import Action
 6 | 
 7 | 
 8 | @dataclass
 9 | class MessageAction(Action):
10 |     content: str
11 |     wait_for_response: bool = False
12 |     action: str = ActionType.MESSAGE
13 | 
14 |     @property
15 |     def message(self) -> str:
16 |         return self.content
17 | 
18 |     def __str__(self) -> str:
19 |         ret = f'**MessageAction** (source={self.source})\n'
20 |         ret += f'CONTENT: {self.content}'
21 |         return ret
22 | 


--------------------------------------------------------------------------------
/easyweb/events/action/planning.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ActionType
 4 | 
 5 | from .action import Action
 6 | 
 7 | 
 8 | @dataclass
 9 | class StartPlanningAction(Action):
10 |     eta_seconds: float
11 |     action: str = ActionType.START_PLANNING
12 | 
13 |     @property
14 |     def message(self) -> str:
15 |         return f'Planning... ETA: {self.eta_seconds:.1f} seconds'
16 | 
17 |     def __str__(self) -> str:
18 |         return f'**StartPlanning** (eta_seconds={self.eta_seconds:.1f})'
19 | 
20 | 
21 | @dataclass
22 | class FinishPlanningAction(Action):
23 |     next_step: str
24 |     action: str = ActionType.FINISH_PLANNING
25 | 
26 |     @property
27 |     def message(self) -> str:
28 |         return self.next_step
29 | 
30 |     def __str__(self) -> str:
31 |         return f'**FinishPlanning**\nNEXT STEP: {self.next_step}'
32 | 


--------------------------------------------------------------------------------
/easyweb/events/action/tasks.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | from easyweb.core.schema import ActionType
 4 | 
 5 | from .action import Action
 6 | 
 7 | 
 8 | @dataclass
 9 | class AddTaskAction(Action):
10 |     parent: str
11 |     goal: str
12 |     subtasks: list = field(default_factory=list)
13 |     thought: str = ''
14 |     action: str = ActionType.ADD_TASK
15 | 
16 |     @property
17 |     def message(self) -> str:
18 |         return f'Added task: {self.goal}'
19 | 
20 | 
21 | @dataclass
22 | class ModifyTaskAction(Action):
23 |     task_id: str
24 |     state: str
25 |     thought: str = ''
26 |     action: str = ActionType.MODIFY_TASK
27 | 
28 |     @property
29 |     def message(self) -> str:
30 |         return f'Set task {self.task_id} to {self.state}'
31 | 


--------------------------------------------------------------------------------
/easyweb/events/event.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from dataclasses import dataclass
 3 | from enum import Enum
 4 | 
 5 | 
 6 | class EventSource(str, Enum):
 7 |     AGENT = 'agent'
 8 |     USER = 'user'
 9 | 
10 | 
11 | @dataclass
12 | class Event:
13 |     @property
14 |     def message(self) -> str | None:
15 |         if hasattr(self, '_message'):
16 |             return self._message  # type: ignore [attr-defined]
17 |         return ''
18 | 
19 |     @property
20 |     def id(self) -> int | None:
21 |         if hasattr(self, '_id'):
22 |             return self._id  # type: ignore [attr-defined]
23 |         return -1
24 | 
25 |     @property
26 |     def timestamp(self) -> datetime.datetime | None:
27 |         if hasattr(self, '_timestamp'):
28 |             return self._timestamp  # type: ignore [attr-defined]
29 |         return None
30 | 
31 |     @property
32 |     def source(self) -> EventSource | None:
33 |         if hasattr(self, '_source'):
34 |             return self._source  # type: ignore [attr-defined]
35 |         return None
36 | 
37 |     @property
38 |     def cause(self) -> int | None:
39 |         if hasattr(self, '_cause'):
40 |             return self._cause  # type: ignore [attr-defined]
41 |         return None
42 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .agent import AgentStateChangedObservation
 2 | from .browse import BrowserOutputObservation
 3 | from .commands import CmdOutputObservation, IPythonRunCellObservation
 4 | from .delegate import AgentDelegateObservation
 5 | from .empty import NullObservation
 6 | from .error import ErrorObservation
 7 | from .files import FileReadObservation, FileWriteObservation
 8 | from .observation import Observation
 9 | from .recall import AgentRecallObservation
10 | from .success import SuccessObservation
11 | 
12 | __all__ = [
13 |     'Observation',
14 |     'NullObservation',
15 |     'CmdOutputObservation',
16 |     'IPythonRunCellObservation',
17 |     'BrowserOutputObservation',
18 |     'FileReadObservation',
19 |     'FileWriteObservation',
20 |     'AgentRecallObservation',
21 |     'ErrorObservation',
22 |     'AgentStateChangedObservation',
23 |     'AgentDelegateObservation',
24 |     'SuccessObservation',
25 | ]
26 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/agent.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class AgentStateChangedObservation(Observation):
10 |     """
11 |     This data class represents the result from delegating to another agent
12 |     """
13 | 
14 |     agent_state: str
15 |     observation: str = ObservationType.AGENT_STATE_CHANGED
16 | 
17 |     @property
18 |     def message(self) -> str:
19 |         return ''
20 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/browse.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class BrowserOutputObservation(Observation):
10 |     """
11 |     This data class represents the output of a browser.
12 |     """
13 | 
14 |     url: str
15 |     screenshot: str = field(repr=False)  # don't show in repr
16 |     status_code: int = 200
17 |     error: bool = False
18 |     observation: str = ObservationType.BROWSE
19 |     # do not include in the memory
20 |     open_pages_urls: list = field(default_factory=list)
21 |     active_page_index: int = -1
22 |     dom_object: dict = field(default_factory=dict, repr=False)  # don't show in repr
23 |     axtree_object: dict = field(default_factory=dict, repr=False)  # don't show in repr
24 |     extra_element_properties: dict = field(
25 |         default_factory=dict, repr=False
26 |     )  # don't show in repr
27 |     last_browser_action: str = ''
28 |     last_browser_action_error: str = ''
29 |     focused_element_bid: str = ''
30 |     scroll_position: dict = field(default_factory=dict, repr=False)
31 | 
32 |     @property
33 |     def message(self) -> str:
34 |         return 'Visited ' + self.url
35 | 
36 |     def __str__(self) -> str:
37 |         return (
38 |             '**BrowserOutputObservation**\n'
39 |             f'URL: {self.url}\n'
40 |             f'Scroll Position: {self.scroll_position}\n'
41 |             f'Status code: {self.status_code}\n'
42 |             f'Error: {self.error}\n'
43 |             f'Open pages: {self.open_pages_urls}\n'
44 |             f'Active page index: {self.active_page_index}\n'
45 |             f'Last browser action: {self.last_browser_action}\n'
46 |             f'Last browser action error: {self.last_browser_action_error}\n'
47 |             f'Focused element bid: {self.focused_element_bid}\n'
48 |             f'CONTENT: {self.content[:1000]}\n'
49 |         )
50 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/commands.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class CmdOutputObservation(Observation):
10 |     """
11 |     This data class represents the output of a command.
12 |     """
13 | 
14 |     command_id: int
15 |     command: str
16 |     exit_code: int = 0
17 |     observation: str = ObservationType.RUN
18 | 
19 |     @property
20 |     def error(self) -> bool:
21 |         return self.exit_code != 0
22 | 
23 |     @property
24 |     def message(self) -> str:
25 |         return f'Command `{self.command}` executed with exit code {self.exit_code}.'
26 | 
27 |     def __str__(self) -> str:
28 |         return f'**CmdOutputObservation (exit code={self.exit_code})**\n{self.content}'
29 | 
30 | 
31 | @dataclass
32 | class IPythonRunCellObservation(Observation):
33 |     """
34 |     This data class represents the output of a IPythonRunCellAction.
35 |     """
36 | 
37 |     code: str
38 |     observation: str = ObservationType.RUN_IPYTHON
39 | 
40 |     @property
41 |     def error(self) -> bool:
42 |         return False  # IPython cells do not return exit codes
43 | 
44 |     @property
45 |     def message(self) -> str:
46 |         return 'Code executed in IPython cell.'
47 | 
48 |     def __str__(self) -> str:
49 |         return f'**IPythonRunCellObservation**\n{self.content}'
50 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/delegate.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class AgentDelegateObservation(Observation):
10 |     """
11 |     This data class represents the result from delegating to another agent
12 |     """
13 | 
14 |     outputs: dict
15 |     observation: str = ObservationType.DELEGATE
16 | 
17 |     @property
18 |     def message(self) -> str:
19 |         return ''
20 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/empty.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class NullObservation(Observation):
10 |     """
11 |     This data class represents a null observation.
12 |     This is used when the produced action is NOT executable.
13 |     """
14 | 
15 |     observation: str = ObservationType.NULL
16 | 
17 |     @property
18 |     def message(self) -> str:
19 |         return 'No observation'
20 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/error.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class ErrorObservation(Observation):
10 |     """
11 |     This data class represents an error encountered by the agent.
12 |     """
13 | 
14 |     observation: str = ObservationType.ERROR
15 | 
16 |     @property
17 |     def message(self) -> str:
18 |         return self.content
19 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/files.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class FileReadObservation(Observation):
10 |     """
11 |     This data class represents the content of a file.
12 |     """
13 | 
14 |     path: str
15 |     observation: str = ObservationType.READ
16 | 
17 |     @property
18 |     def message(self) -> str:
19 |         return f'I read the file {self.path}.'
20 | 
21 | 
22 | @dataclass
23 | class FileWriteObservation(Observation):
24 |     """
25 |     This data class represents a file write operation
26 |     """
27 | 
28 |     path: str
29 |     observation: str = ObservationType.WRITE
30 | 
31 |     @property
32 |     def message(self) -> str:
33 |         return f'I wrote to the file {self.path}.'
34 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/observation.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | 
3 | from easyweb.events.event import Event
4 | 
5 | 
6 | @dataclass
7 | class Observation(Event):
8 |     content: str
9 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/recall.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class AgentRecallObservation(Observation):
10 |     """
11 |     This data class represents a list of memories recalled by the agent.
12 |     """
13 | 
14 |     memories: list[str]
15 |     role: str = 'assistant'
16 |     observation: str = ObservationType.RECALL
17 | 
18 |     @property
19 |     def message(self) -> str:
20 |         return 'The agent recalled memories.'
21 | 


--------------------------------------------------------------------------------
/easyweb/events/observation/success.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from easyweb.core.schema import ObservationType
 4 | 
 5 | from .observation import Observation
 6 | 
 7 | 
 8 | @dataclass
 9 | class SuccessObservation(Observation):
10 |     """
11 |     This data class represents the result of a successful action.
12 |     """
13 | 
14 |     observation: str = ObservationType.SUCCESS
15 | 
16 |     @property
17 |     def message(self) -> str:
18 |         return self.content
19 | 


--------------------------------------------------------------------------------
/easyweb/events/serialization/__init__.py:
--------------------------------------------------------------------------------
 1 | from .action import (
 2 |     action_from_dict,
 3 | )
 4 | from .event import (
 5 |     event_from_dict,
 6 |     event_to_dict,
 7 |     event_to_memory,
 8 | )
 9 | from .observation import (
10 |     observation_from_dict,
11 | )
12 | 
13 | __all__ = [
14 |     'action_from_dict',
15 |     'event_from_dict',
16 |     'event_to_dict',
17 |     'event_to_memory',
18 |     'observation_from_dict',
19 | ]
20 | 


--------------------------------------------------------------------------------
/easyweb/events/serialization/action.py:
--------------------------------------------------------------------------------
 1 | from easyweb.core.exceptions import AgentMalformedActionError
 2 | from easyweb.events.action.action import Action
 3 | from easyweb.events.action.agent import (
 4 |     AgentDelegateAction,
 5 |     AgentFinishAction,
 6 |     AgentRecallAction,
 7 |     AgentRejectAction,
 8 |     ChangeAgentStateAction,
 9 | )
10 | from easyweb.events.action.browse import BrowseInteractiveAction, BrowseURLAction
11 | from easyweb.events.action.commands import (
12 |     CmdKillAction,
13 |     CmdRunAction,
14 |     IPythonRunCellAction,
15 | )
16 | from easyweb.events.action.empty import NullAction
17 | from easyweb.events.action.files import FileReadAction, FileWriteAction
18 | from easyweb.events.action.message import MessageAction
19 | from easyweb.events.action.planning import FinishPlanningAction, StartPlanningAction
20 | from easyweb.events.action.tasks import AddTaskAction, ModifyTaskAction
21 | 
22 | actions = (
23 |     NullAction,
24 |     CmdKillAction,
25 |     CmdRunAction,
26 |     IPythonRunCellAction,
27 |     BrowseURLAction,
28 |     BrowseInteractiveAction,
29 |     FileReadAction,
30 |     FileWriteAction,
31 |     AgentRecallAction,
32 |     AgentFinishAction,
33 |     AgentRejectAction,
34 |     AgentDelegateAction,
35 |     AddTaskAction,
36 |     ModifyTaskAction,
37 |     ChangeAgentStateAction,
38 |     MessageAction,
39 |     StartPlanningAction,
40 |     FinishPlanningAction,
41 | )
42 | 
43 | ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions}  # type: ignore[attr-defined]
44 | 
45 | 
46 | def action_from_dict(action: dict) -> Action:
47 |     if not isinstance(action, dict):
48 |         raise AgentMalformedActionError('action must be a dictionary')
49 |     action = action.copy()
50 |     if 'action' not in action:
51 |         raise AgentMalformedActionError(f"'action' key is not found in {action=}")
52 |     if not isinstance(action['action'], str):
53 |         raise AgentMalformedActionError(
54 |             f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}"
55 |         )
56 |     action_class = ACTION_TYPE_TO_CLASS.get(action['action'])
57 |     if action_class is None:
58 |         raise AgentMalformedActionError(
59 |             f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}"
60 |         )
61 |     args = action.get('args', {})
62 |     try:
63 |         decoded_action = action_class(**args)
64 |     except TypeError:
65 |         raise AgentMalformedActionError(f'action={action} has the wrong arguments')
66 |     return decoded_action
67 | 


--------------------------------------------------------------------------------
/easyweb/events/serialization/event.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import asdict
 2 | from datetime import datetime
 3 | 
 4 | from easyweb.events import Event, EventSource
 5 | 
 6 | from .action import action_from_dict
 7 | from .observation import observation_from_dict
 8 | from .utils import remove_fields
 9 | 
10 | # TODO: move `content` into `extras`
11 | TOP_KEYS = ['id', 'timestamp', 'source', 'message', 'cause', 'action', 'observation']
12 | UNDERSCORE_KEYS = ['id', 'timestamp', 'source', 'cause']
13 | 
14 | DELETE_FROM_MEMORY_EXTRAS = {
15 |     'screenshot',
16 |     'dom_object',
17 |     'axtree_object',
18 |     'open_pages_urls',
19 |     'active_page_index',
20 |     'last_browser_action',
21 |     'last_browser_action_error',
22 |     'focused_element_bid',
23 |     'extra_element_properties',
24 | }
25 | 
26 | 
27 | def event_from_dict(data) -> 'Event':
28 |     evt: Event
29 |     if 'action' in data:
30 |         evt = action_from_dict(data)
31 |     elif 'observation' in data:
32 |         evt = observation_from_dict(data)
33 |     else:
34 |         raise ValueError('Unknown event type: ' + data)
35 |     for key in UNDERSCORE_KEYS:
36 |         if key in data:
37 |             value = data[key]
38 |             if key == 'timestamp':
39 |                 value = datetime.fromisoformat(value)
40 |             if key == 'source':
41 |                 value = EventSource(value)
42 |             setattr(evt, '_' + key, value)
43 |     return evt
44 | 
45 | 
46 | def event_to_dict(event: 'Event') -> dict:
47 |     props = asdict(event)
48 |     d = {}
49 |     for key in TOP_KEYS:
50 |         if hasattr(event, key) and getattr(event, key) is not None:
51 |             d[key] = getattr(event, key)
52 |         elif hasattr(event, f'_{key}') and getattr(event, f'_{key}') is not None:
53 |             d[key] = getattr(event, f'_{key}')
54 |         if key == 'id' and d.get('id') == -1:
55 |             d.pop('id', None)
56 |         if key == 'timestamp' and 'timestamp' in d:
57 |             d['timestamp'] = d['timestamp'].isoformat()
58 |         if key == 'source' and 'source' in d:
59 |             d['source'] = d['source'].value
60 |         props.pop(key, None)
61 |     if 'action' in d:
62 |         d['args'] = props
63 |     elif 'observation' in d:
64 |         d['content'] = props.pop('content', '')
65 |         d['extras'] = props
66 |     else:
67 |         raise ValueError('Event must be either action or observation')
68 |     return d
69 | 
70 | 
71 | def event_to_memory(event: 'Event') -> dict:
72 |     d = event_to_dict(event)
73 |     d.pop('id', None)
74 |     d.pop('cause', None)
75 |     d.pop('timestamp', None)
76 |     d.pop('message', None)
77 |     if 'extras' in d:
78 |         remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS)
79 |     return d
80 | 


--------------------------------------------------------------------------------
/easyweb/events/serialization/observation.py:
--------------------------------------------------------------------------------
 1 | from easyweb.events.observation.agent import AgentStateChangedObservation
 2 | from easyweb.events.observation.browse import BrowserOutputObservation
 3 | from easyweb.events.observation.commands import (
 4 |     CmdOutputObservation,
 5 |     IPythonRunCellObservation,
 6 | )
 7 | from easyweb.events.observation.delegate import AgentDelegateObservation
 8 | from easyweb.events.observation.empty import NullObservation
 9 | from easyweb.events.observation.error import ErrorObservation
10 | from easyweb.events.observation.files import FileReadObservation, FileWriteObservation
11 | from easyweb.events.observation.observation import Observation
12 | from easyweb.events.observation.recall import AgentRecallObservation
13 | from easyweb.events.observation.success import SuccessObservation
14 | 
15 | observations = (
16 |     NullObservation,
17 |     CmdOutputObservation,
18 |     IPythonRunCellObservation,
19 |     BrowserOutputObservation,
20 |     FileReadObservation,
21 |     FileWriteObservation,
22 |     AgentRecallObservation,
23 |     AgentDelegateObservation,
24 |     SuccessObservation,
25 |     ErrorObservation,
26 |     AgentStateChangedObservation,
27 | )
28 | 
29 | OBSERVATION_TYPE_TO_CLASS = {
30 |     observation_class.observation: observation_class  # type: ignore[attr-defined]
31 |     for observation_class in observations
32 | }
33 | 
34 | 
35 | def observation_from_dict(observation: dict) -> Observation:
36 |     observation = observation.copy()
37 |     if 'observation' not in observation:
38 |         raise KeyError(f"'observation' key is not found in {observation=}")
39 |     observation_class = OBSERVATION_TYPE_TO_CLASS.get(observation['observation'])
40 |     if observation_class is None:
41 |         raise KeyError(
42 |             f"'{observation['observation']=}' is not defined. Available observations: {OBSERVATION_TYPE_TO_CLASS.keys()}"
43 |         )
44 |     observation.pop('observation')
45 |     observation.pop('message', None)
46 |     content = observation.pop('content', '')
47 |     extras = observation.pop('extras', {})
48 |     return observation_class(content=content, **extras)
49 | 


--------------------------------------------------------------------------------
/easyweb/events/serialization/utils.py:
--------------------------------------------------------------------------------
 1 | def remove_fields(obj, fields: set[str]):
 2 |     """
 3 |     Remove fields from an object.
 4 | 
 5 |     Parameters:
 6 |     - obj: The dictionary, or list of dictionaries to remove fields from
 7 |     - fields (set[str]): A set of field names to remove from the object
 8 |     """
 9 |     if isinstance(obj, dict):
10 |         for field in fields:
11 |             if field in obj:
12 |                 del obj[field]
13 |         for _, value in obj.items():
14 |             remove_fields(value, fields)
15 |     elif isinstance(obj, list) or isinstance(obj, tuple):
16 |         for item in obj:
17 |             remove_fields(item, fields)
18 |     elif hasattr(obj, '__dataclass_fields__'):
19 |         raise ValueError(
20 |             'Object must not contain dataclass, consider converting to dict first'
21 |         )
22 | 


--------------------------------------------------------------------------------
/easyweb/llm/bedrock.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import boto3
 4 | 
 5 | from easyweb.core.config import config
 6 | from easyweb.core.logger import easyweb_logger as logger
 7 | 
 8 | AWS_ACCESS_KEY_ID = config.llm.aws_access_key_id
 9 | AWS_SECRET_ACCESS_KEY = config.llm.aws_secret_access_key
10 | AWS_REGION_NAME = config.llm.aws_region_name
11 | 
12 | # It needs to be set as an environment variable, if the variable is configured in the Config file.
13 | if AWS_ACCESS_KEY_ID is not None:
14 |     os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID
15 | if AWS_SECRET_ACCESS_KEY is not None:
16 |     os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY
17 | if AWS_REGION_NAME is not None:
18 |     os.environ['AWS_REGION_NAME'] = AWS_REGION_NAME
19 | 
20 | 
21 | def list_foundation_models():
22 |     try:
23 |         # The AWS bedrock model id is not queried, if no AWS parameters are configured.
24 |         if (
25 |             AWS_REGION_NAME is None
26 |             or AWS_ACCESS_KEY_ID is None
27 |             or AWS_SECRET_ACCESS_KEY is None
28 |         ):
29 |             return []
30 | 
31 |         client = boto3.client(
32 |             service_name='bedrock',
33 |             region_name=AWS_REGION_NAME,
34 |             aws_access_key_id=AWS_ACCESS_KEY_ID,
35 |             aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
36 |         )
37 |         foundation_models_list = client.list_foundation_models(
38 |             byOutputModality='TEXT', byInferenceType='ON_DEMAND'
39 |         )
40 |         model_summaries = foundation_models_list['modelSummaries']
41 |         return ['bedrock/' + model['modelId'] for model in model_summaries]
42 |     except Exception as err:
43 |         logger.warning(
44 |             '%s. Please config AWS_REGION_NAME AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY'
45 |             ' if you want use bedrock model.',
46 |             err,
47 |         )
48 |         return []
49 | 
50 | 
51 | def remove_error_modelId(model_list):
52 |     return list(filter(lambda m: not m.startswith('bedrock'), model_list))
53 | 


--------------------------------------------------------------------------------
/easyweb/memory/__init__.py:
--------------------------------------------------------------------------------
1 | from .condenser import MemoryCondenser
2 | from .history import ShortTermHistory
3 | from .memory import LongTermMemory
4 | 
5 | __all__ = ['LongTermMemory', 'ShortTermHistory', 'MemoryCondenser']
6 | 


--------------------------------------------------------------------------------
/easyweb/memory/condenser.py:
--------------------------------------------------------------------------------
 1 | from easyweb.core.logger import easyweb_logger as logger
 2 | from easyweb.llm.llm import LLM
 3 | 
 4 | 
 5 | class MemoryCondenser:
 6 |     def condense(self, summarize_prompt: str, llm: LLM):
 7 |         """
 8 |         Attempts to condense the monologue by using the llm
 9 | 
10 |         Parameters:
11 |         - llm (LLM): llm to be used for summarization
12 | 
13 |         Raises:
14 |         - Exception: the same exception as it got from the llm or processing the response
15 |         """
16 | 
17 |         try:
18 |             messages = [{'content': summarize_prompt, 'role': 'user'}]
19 |             resp = llm.do_completion(messages=messages)
20 |             summary_response = resp['choices'][0]['message']['content']
21 |             return summary_response
22 |         except Exception as e:
23 |             logger.error('Error condensing thoughts: %s', str(e), exc_info=False)
24 | 
25 |             # TODO If the llm fails with ContextWindowExceededError, we can try to condense the monologue chunk by chunk
26 |             raise
27 | 


--------------------------------------------------------------------------------
/easyweb/memory/history.py:
--------------------------------------------------------------------------------
 1 | import easyweb.core.utils.json as json
 2 | from easyweb.core.exceptions import AgentEventTypeError
 3 | from easyweb.core.logger import easyweb_logger as logger
 4 | 
 5 | 
 6 | class ShortTermHistory:
 7 |     """
 8 |     The short term history is the most recent series of events.
 9 |     An agent can send this in the prompt or use it for other purpose.
10 |     """
11 | 
12 |     def __init__(self):
13 |         """
14 |         Initialize the empty list of events
15 |         """
16 |         self.events = []
17 | 
18 |     def add_event(self, event_dict: dict):
19 |         """
20 |         Adds an event to memory if it is a valid event.
21 | 
22 |         Parameters:
23 |         - event_dict (dict): The event that we want to add to memory
24 | 
25 |         Raises:
26 |         - AgentEventTypeError: If event_dict is not a dict
27 |         """
28 |         if not isinstance(event_dict, dict):
29 |             raise AgentEventTypeError()
30 |         self.events.append(event_dict)
31 | 
32 |     def get_events(self):
33 |         """
34 |         Get the events in the agent's recent history.
35 | 
36 |         Returns:
37 |         - List: The list of events that the agent remembers easily.
38 |         """
39 |         return self.events
40 | 
41 |     def get_total_length(self):
42 |         """
43 |         Gives the total number of characters in all history
44 | 
45 |         Returns:
46 |         - Int: Total number of characters of the recent history.
47 |         """
48 |         total_length = 0
49 |         for t in self.events:
50 |             try:
51 |                 total_length += len(json.dumps(t))
52 |             except TypeError as e:
53 |                 logger.error('Error serializing event: %s', str(e), exc_info=False)
54 |         return total_length
55 | 


--------------------------------------------------------------------------------
/easyweb/runtime/__init__.py:
--------------------------------------------------------------------------------
1 | from .docker.exec_box import DockerExecBox
2 | from .docker.local_box import LocalBox
3 | from .docker.ssh_box import DockerSSHBox
4 | from .e2b.sandbox import E2BBox
5 | from .sandbox import Sandbox
6 | 
7 | __all__ = ['Sandbox', 'DockerSSHBox', 'DockerExecBox', 'E2BBox', 'LocalBox']
8 | 


--------------------------------------------------------------------------------
/easyweb/runtime/browser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/runtime/browser/__init__.py


--------------------------------------------------------------------------------
/easyweb/runtime/docker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/runtime/docker/__init__.py


--------------------------------------------------------------------------------
/easyweb/runtime/e2b/README.md:
--------------------------------------------------------------------------------
 1 | # How to use E2B
 2 | 
 3 | [E2B](https://e2b.dev) is an [open-source](https://github.com/e2b-dev/e2b) secure cloud environment (sandbox) made for running AI-generated code and agents. E2B offers [Python](https://pypi.org/project/e2b/) and [JS/TS](https://www.npmjs.com/package/e2b) SDK to spawn and control these sandboxes.
 4 | 
 5 | ## Getting started
 6 | 
 7 | 1. [Get your API key](https://e2b.dev/docs/getting-started/api-key)
 8 | 
 9 | 1. Set your E2B API key to the `E2B_API_KEY` env var when starting the Docker container
10 | 
11 | 1. **Optional** - Install the CLI with NPM.
12 |     ```sh
13 |     npm install -g @e2b/cli@latest
14 |     ```
15 |     Full CLI API is [here](https://e2b.dev/docs/cli/installation).
16 | 
17 | ## OpenDevin sandbox
18 | You can use the E2B CLI to create a custom sandbox with a Dockerfile. Read the full guide [here](https://e2b.dev/docs/guide/custom-sandbox). The premade OpenDevin sandbox for E2B is set up in the [`containers` directory](/containers/e2b-sandbox). and it's called `open-devin`.
19 | 
20 | ## Debugging
21 | You can connect to a running E2B sandbox with E2B CLI in your terminal.
22 | 
23 | - List all running sandboxes (based on your API key)
24 |     ```sh
25 |     e2b sandbox list
26 |     ```
27 | 
28 | - Connect to a running sandbox
29 |     ```sh
30 |     e2b sandbox connect <sandbox-id>
31 |     ```
32 | 
33 | ## Links
34 | - [E2B Docs](https://e2b.dev/docs)
35 | - [E2B GitHub](https://github.com/e2b-dev/e2b)
36 | 


--------------------------------------------------------------------------------
/easyweb/runtime/e2b/filestore.py:
--------------------------------------------------------------------------------
 1 | from easyweb.storage.files import FileStore
 2 | 
 3 | 
 4 | class E2BFileStore(FileStore):
 5 |     def __init__(self, filesystem):
 6 |         self.filesystem = filesystem
 7 | 
 8 |     def write(self, path: str, contents: str) -> None:
 9 |         self.filesystem.write(path, contents)
10 | 
11 |     def read(self, path: str) -> str:
12 |         return self.filesystem.read(path)
13 | 
14 |     def list(self, path: str) -> list[str]:
15 |         return self.filesystem.list(path)
16 | 
17 |     def delete(self, path: str) -> None:
18 |         self.filesystem.delete(path)
19 | 


--------------------------------------------------------------------------------
/easyweb/runtime/e2b/process.py:
--------------------------------------------------------------------------------
 1 | from e2b import Process as E2BSandboxProcess
 2 | 
 3 | from easyweb.runtime.docker.process import Process
 4 | 
 5 | 
 6 | class E2BProcess(Process):
 7 |     def __init__(self, process: E2BSandboxProcess, cmd: str):
 8 |         self._process = process
 9 |         self._command = cmd
10 | 
11 |     def kill(self):
12 |         self._process.kill()
13 | 
14 |     def read_logs(self):
15 |         return '\n'.join([m.line for m in self._process.output_messages])
16 | 
17 |     @property
18 |     def pid(self) -> int:
19 |         return int(self._process.process_id)
20 | 
21 |     @property
22 |     def command(self) -> str:
23 |         return self._command
24 | 
25 |     @property
26 |     def output_messages(self):
27 |         return self._process.output_messages
28 | 


--------------------------------------------------------------------------------
/easyweb/runtime/e2b/runtime.py:
--------------------------------------------------------------------------------
 1 | from easyweb.events.action import (
 2 |     FileReadAction,
 3 |     FileWriteAction,
 4 | )
 5 | from easyweb.events.observation import (
 6 |     ErrorObservation,
 7 |     FileReadObservation,
 8 |     FileWriteObservation,
 9 |     Observation,
10 | )
11 | from easyweb.events.stream import EventStream
12 | from easyweb.runtime import Sandbox
13 | from easyweb.runtime.server.files import insert_lines, read_lines
14 | from easyweb.runtime.server.runtime import ServerRuntime
15 | 
16 | from .filestore import E2BFileStore
17 | from .sandbox import E2BSandbox
18 | 
19 | 
20 | class E2BRuntime(ServerRuntime):
21 |     def __init__(
22 |         self,
23 |         event_stream: EventStream,
24 |         sid: str = 'default',
25 |         sandbox: Sandbox | None = None,
26 |     ):
27 |         super().__init__(event_stream, sid, sandbox)
28 |         if not isinstance(self.sandbox, E2BSandbox):
29 |             raise ValueError('E2BRuntime requires an E2BSandbox')
30 |         self.file_store = E2BFileStore(self.sandbox.filesystem)
31 | 
32 |     async def read(self, action: FileReadAction) -> Observation:
33 |         content = self.file_store.read(action.path)
34 |         lines = read_lines(content.split('\n'), action.start, action.end)
35 |         code_view = ''.join(lines)
36 |         return FileReadObservation(code_view, path=action.path)
37 | 
38 |     async def write(self, action: FileWriteAction) -> Observation:
39 |         if action.start == 0 and action.end == -1:
40 |             self.file_store.write(action.path, action.content)
41 |             return FileWriteObservation(content='', path=action.path)
42 |         files = self.file_store.list(action.path)
43 |         if action.path in files:
44 |             all_lines = self.file_store.read(action.path).split('\n')
45 |             new_file = insert_lines(
46 |                 action.content.split('\n'), all_lines, action.start, action.end
47 |             )
48 |             self.file_store.write(action.path, ''.join(new_file))
49 |             return FileWriteObservation('', path=action.path)
50 |         else:
51 |             # FIXME: we should create a new file here
52 |             return ErrorObservation(f'File not found: {action.path}')
53 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | # Requirements
 2 | from .agent_skills import AgentSkillsRequirement
 3 | from .jupyter import JupyterRequirement
 4 | from .mixin import PluginMixin
 5 | from .requirement import PluginRequirement
 6 | from .swe_agent_commands import SWEAgentCommandsRequirement
 7 | 
 8 | __all__ = [
 9 |     'PluginMixin',
10 |     'PluginRequirement',
11 |     'AgentSkillsRequirement',
12 |     'JupyterRequirement',
13 |     'SWEAgentCommandsRequirement',
14 | ]
15 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/agent_skills/README.md:
--------------------------------------------------------------------------------
 1 | # OpenDevin Skill Sets
 2 | 
 3 | This folder implements a skill/tool set `agentskills` for OpenDevin.
 4 | 
 5 | It is intended to be used by the agent **inside sandbox**.
 6 | The skill set will be exposed as a `pip` package that can be installed as a plugin inside the sandbox.
 7 | 
 8 | The skill set can contains a bunch of wrapped tools for agent ([many examples here](https://github.com/OpenDevin/OpenDevin/pull/1914)), for example:
 9 | - Audio/Video to text (these are a temporary solution, and we should switch to multimodal models when they are sufficiently cheap
10 | - PDF to text
11 | - etc.
12 | 
13 | # Inclusion Criteria
14 | 
15 | We are walking a fine line here.
16 | We DON't want to *wrap* every possible python packages and re-teach agent their usage (e.g., LLM already knows `pandas` pretty well, so we don't really need create a skill that reads `csv` - it can just use `pandas`).
17 | 
18 | We ONLY want to add a new skill, when:
19 | - Such skill is not easily achievable for LLM to write code directly (e.g., edit code and replace certain line)
20 | - It involves calling an external model (e.g., you need to call a speech to text model, editor model for speculative editing)
21 | 
22 | # Intended functionality
23 | 
24 | - Tool/skill usage (through `IPythonRunAction`)
25 | 
26 | ```python
27 | # In[1]
28 | from agentskills import open_file, edit_file
29 | open_file("/workspace/a.txt")
30 | # Out[1]
31 | [SWE-agent open output]
32 | 
33 | # In[2]
34 | edit_file(
35 |     "/workspace/a.txt",
36 |     start=1, end=3,
37 |     content=(
38 |         ("REPLACE TEXT")
39 | ))
40 | # Out[1]
41 | [SWE-agent edit output]
42 | ```
43 | 
44 | - Tool/skill retrieval (through `IPythonRunAction`)
45 | 
46 | ```python
47 | # In[1]
48 | from agentskills import help_me
49 | 
50 | help_me("I want to solve a task that involves reading a bunch of PDFs and reason about them")
51 | 
52 | # Out[1]
53 | "Here are the top skills that may be helpful to you:
54 | - `pdf_to_text`: [documentation about the tools]
55 | ...
56 | "
57 | ```
58 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/agent_skills/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass
 3 | 
 4 | from easyweb.runtime.plugins.agent_skills.agentskills import DOCUMENTATION
 5 | from easyweb.runtime.plugins.requirement import PluginRequirement
 6 | 
 7 | 
 8 | @dataclass
 9 | class AgentSkillsRequirement(PluginRequirement):
10 |     name: str = 'agent_skills'
11 |     host_src: str = os.path.dirname(
12 |         os.path.abspath(__file__)
13 |     )  # The directory of this file (opendevin/runtime/plugins/jupyter)
14 |     sandbox_dest: str = '/opendevin/plugins/agent_skills'
15 |     bash_script_path: str = 'setup.sh'
16 |     documentation: str = DOCUMENTATION
17 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/agent_skills/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # add agent_skills to PATH
 6 | echo 'export PATH=/opendevin/plugins/agent_skills:$PATH' >> ~/.bashrc
 7 | export PATH=/opendevin/plugins/agent_skills:$PATH
 8 | 
 9 | # add agent_skills to PYTHONPATH
10 | echo 'export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH' >> ~/.bashrc
11 | export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH
12 | 
13 | pip install flake8 python-docx PyPDF2 python-pptx pylatexenc openai opencv-python
14 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/jupyter/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass
 3 | 
 4 | from easyweb.runtime.plugins.requirement import PluginRequirement
 5 | 
 6 | 
 7 | @dataclass
 8 | class JupyterRequirement(PluginRequirement):
 9 |     name: str = 'jupyter'
10 |     host_src: str = os.path.dirname(
11 |         os.path.abspath(__file__)
12 |     )  # The directory of this file (opendevin/runtime/plugins/jupyter)
13 |     sandbox_dest: str = '/opendevin/plugins/jupyter'
14 |     bash_script_path: str = 'setup.sh'
15 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/jupyter/execute_cli:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Run the Python script with the specified interpreter
3 | export JUPYTER_PWD=$(pwd)
4 | $OPENDEVIN_PYTHON_INTERPRETER /opendevin/plugins/jupyter/execute_cli.py
5 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/jupyter/execute_cli.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | 
 5 | import requests
 6 | 
 7 | # Read the Python code from STDIN
 8 | code = sys.stdin.read()
 9 | 
10 | 
11 | def execute_code(code, print_output=True):
12 |     PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT')
13 |     POST_URL = f'http://localhost:{PORT}/execute'
14 | 
15 |     # Set the default kernel ID
16 |     kernel_id = 'default'
17 | 
18 |     for i in range(10):
19 |         try:
20 |             response = requests.post(
21 |                 POST_URL, json={'kernel_id': kernel_id, 'code': code}
22 |             )
23 |             if '500: Internal Server Error' not in response.text:
24 |                 if print_output:
25 |                     print(response.text)
26 |                 break
27 |         except requests.exceptions.ConnectionError:
28 |             pass
29 |         time.sleep(2)
30 |     else:
31 |         print('Failed to connect to the Jupyter server')
32 | 
33 | 
34 | if jupyter_pwd := os.environ.get('JUPYTER_PWD'):
35 |     execute_code(
36 |         f'import os\nos.environ["JUPYTER_PWD"] = "{jupyter_pwd}"\n', print_output=False
37 |     )
38 | 
39 | execute_code(code)
40 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/requirement.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class PluginRequirement:
 6 |     """Requirement for a plugin."""
 7 | 
 8 |     name: str
 9 |     # FOLDER/FILES to be copied to the sandbox
10 |     host_src: str
11 |     sandbox_dest: str
12 |     # NOTE: bash_script_path should be relative to the `sandbox_dest` path
13 |     bash_script_path: str
14 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass, field
 3 | 
 4 | from easyweb.runtime.plugins.requirement import PluginRequirement
 5 | from easyweb.runtime.plugins.swe_agent_commands.parse_commands import (
 6 |     parse_command_file,
 7 | )
 8 | 
 9 | 
10 | def _resolve_to_cur_dir(filename):
11 |     return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
12 | 
13 | 
14 | def check_and_parse_command_file(filepath) -> str:
15 |     if filepath is None:
16 |         raise FileNotFoundError(f'File not found: {filepath}')
17 |     return parse_command_file(filepath)
18 | 
19 | 
20 | DEFAULT_SCRIPT_FILEPATHS = [
21 |     _resolve_to_cur_dir('defaults.sh'),
22 |     _resolve_to_cur_dir('search.sh'),
23 |     _resolve_to_cur_dir('edit_linting.sh'),
24 | ]
25 | DEFAULT_DOCUMENTATION = ''.join(
26 |     [
27 |         check_and_parse_command_file(filepath)
28 |         for filepath in DEFAULT_SCRIPT_FILEPATHS
29 |         if filepath is not None
30 |     ]
31 | )
32 | 
33 | 
34 | @dataclass
35 | class SWEAgentCommandsRequirement(PluginRequirement):
36 |     name: str = 'swe_agent_commands'
37 |     host_src: str = os.path.dirname(os.path.abspath(__file__))
38 |     sandbox_dest: str = '/opendevin/plugins/swe_agent_commands'
39 |     bash_script_path: str = 'setup_default.sh'
40 | 
41 |     scripts_filepaths: list[str | None] = field(
42 |         default_factory=lambda: DEFAULT_SCRIPT_FILEPATHS
43 |     )
44 |     documentation: str = DEFAULT_DOCUMENTATION
45 | 
46 | 
47 | CURSOR_SCRIPT_FILEPATHS = [
48 |     _resolve_to_cur_dir('cursors_defaults.sh'),
49 |     _resolve_to_cur_dir('cursors_edit_linting.sh'),
50 |     _resolve_to_cur_dir('search.sh'),
51 | ]
52 | CURSOR_DOCUMENTATION = ''.join(
53 |     [
54 |         check_and_parse_command_file(filepath)
55 |         for filepath in CURSOR_SCRIPT_FILEPATHS
56 |         if filepath is not None
57 |     ]
58 | )
59 | 
60 | 
61 | @dataclass
62 | class SWEAgentCursorCommandsRequirement(PluginRequirement):
63 |     name: str = 'swe_agent_commands'
64 |     host_src: str = os.path.dirname(os.path.abspath(__file__))
65 |     sandbox_dest: str = '/opendevin/plugins/swe_agent_commands'
66 |     bash_script_path: str = 'setup_cursor_mode.sh'
67 | 
68 |     scripts_filepaths: list[str | None] = field(
69 |         default_factory=lambda: CURSOR_SCRIPT_FILEPATHS
70 |     )
71 |     documentation: str = CURSOR_DOCUMENTATION
72 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/_setup_cursor_mode_env.sh:
--------------------------------------------------------------------------------
 1 | # Cursor Mode from SWE-Bench
 2 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_cursors_window100-detailed_cmd_format-last_5_history-1_demos.yaml
 3 | export WINDOW=200;
 4 | export OVERLAP=2;
 5 | export CURRENT_LINE=0;
 6 | export CURRENT_FILE='';
 7 | export SEARCH_RESULTS=();
 8 | export SEARCH_FILES=();
 9 | export SEARCH_INDEX=0;
10 | export START_INDEX=0;
11 | export END_INDEX=0;
12 | export START_CURSOR=0;
13 | export END_CURSOR=0;
14 | export START_CURSOR_MARK='"<<<<< START CURSOR >>>>>"';  # these have to use double quotes
15 | export END_CURSOR_MARK='"<<<<< END CURSOR >>>>>"'; # these have to use double quotes
16 | 
17 | state() {
18 |     local working_dir="$PWD";
19 |     if [ -z $CURRENT_FILE ]; then
20 |         echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}';
21 |     else
22 |         echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}';
23 |     fi
24 | };
25 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/_setup_default_env.sh:
--------------------------------------------------------------------------------
 1 | # Default Mode from SWE-Bench
 2 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml
 3 | export WINDOW=100;
 4 | export OVERLAP=2;
 5 | export CURRENT_LINE=0;
 6 | export CURRENT_FILE='';
 7 | export SEARCH_RESULTS=();
 8 | export SEARCH_FILES=();
 9 | export SEARCH_INDEX=0;
10 | 
11 | state() {
12 |     local working_dir="$PWD";
13 |     if [ -z $CURRENT_FILE ]; then
14 |         echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}';
15 |     else
16 |         echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}';
17 |     fi
18 | };
19 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/_split_string:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | 
 4 | 
 5 | def print_flake8_output(input_string, show_line_numbers=False):
 6 |     for value in input_string.split('\n'):
 7 |         parts = value.split()
 8 |         if not show_line_numbers:
 9 |             print(f"- {' '.join(parts[1:])}")
10 |         else:
11 |             line_nums = ':'.join(parts[0].split(':')[1:])
12 |             print(f"- {line_nums} {' '.join(parts[1:])}")
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     lint_output = sys.argv[1]
17 |     print_flake8_output(lint_output)
18 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/parse_commands.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import yaml
 4 | 
 5 | 
 6 | @dataclass()
 7 | class Command:
 8 |     name: str
 9 |     docstring: str | None = None
10 |     signature: str | None = None
11 | 
12 | 
13 | def parse_command_file(filepath: str) -> str:
14 |     content = open(filepath, 'r').read()
15 |     lines = content.split('\n')
16 |     commands: list[Command] = []
17 |     idx = 0
18 |     docs: list[str] = []
19 |     while idx < len(lines):
20 |         line = lines[idx]
21 |         idx += 1
22 |         if line.startswith('# '):
23 |             docs.append(line[2:])
24 |         elif line.strip().endswith('() {'):
25 |             name = line.split()[0][:-2]
26 |             while lines[idx].strip() != '}':
27 |                 idx += 1
28 |             docstring, signature = None, name
29 |             docs_dict = yaml.safe_load('\n'.join(docs).replace('@yaml', ''))
30 |             if docs_dict is not None:
31 |                 docstring = docs_dict.get('docstring')
32 |                 arguments = docs_dict.get('arguments', None)
33 |                 if 'signature' in docs_dict:
34 |                     signature = docs_dict['signature']
35 |                 else:
36 |                     if arguments is not None:
37 |                         for param, settings in arguments.items():
38 |                             if 'required' in settings:
39 |                                 signature += f' <{param}>'
40 |                             else:
41 |                                 signature += f' [<{param}>]'
42 |             command = Command(name, docstring, signature)
43 |             commands.append(command)
44 |             docs = []
45 |     function_docs = ''
46 |     for cmd in commands:
47 |         if cmd.docstring is not None:
48 |             function_docs += f'{cmd.signature or cmd.name} - {cmd.docstring}\n'
49 |     return function_docs
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     import sys
54 | 
55 |     if len(sys.argv) < 2:
56 |         print('Usage: python parse_commands.py <file>')
57 |         sys.exit(1)
58 |     filepath = sys.argv[1]
59 |     filepaths = filepath.split(',')
60 |     for filepath in filepaths:
61 |         docs = parse_command_file(filepath)
62 |         print(docs)
63 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/setup_cursor_mode.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PIP_CACHE_DIR=$HOME/.cache/pip
 4 | pip install flake8
 5 | 
 6 | # Cursor Mode from SWE-Bench
 7 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_cursors_window100-detailed_cmd_format-last_5_history-1_demos.yaml#L108-L111
 8 | echo 'source /opendevin/plugins/swe_agent_commands/_setup_cursor_mode_env.sh' >> ~/.bashrc
 9 | 
10 | # make _split_string (py) available
11 | echo 'export PATH=$PATH:/opendevin/plugins/swe_agent_commands' >> ~/.bashrc
12 | 
13 | echo 'source /opendevin/plugins/swe_agent_commands/cursors_defaults.sh' >> ~/.bashrc
14 | echo 'source /opendevin/plugins/swe_agent_commands/cursors_edit_linting.sh' >> ~/.bashrc
15 | echo 'source /opendevin/plugins/swe_agent_commands/search.sh' >> ~/.bashrc
16 | 
17 | echo 'export SWE_CMD_WORK_DIR="/opendevin/plugins/swe_agent_commands/workdir"' >> ~/.bashrc
18 | sudo mkdir -p /opendevin/plugins/swe_agent_commands/workdir
19 | sudo chmod 777 /opendevin/plugins/swe_agent_commands/workdir
20 | 


--------------------------------------------------------------------------------
/easyweb/runtime/plugins/swe_agent_commands/setup_default.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PIP_CACHE_DIR=$HOME/.cache/pip
 4 | pip install flake8
 5 | 
 6 | # Default Mode from SWE-Bench
 7 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml#L103-L106
 8 | echo 'source /opendevin/plugins/swe_agent_commands/_setup_default_env.sh' >> ~/.bashrc
 9 | 
10 | # make _split_string (py) available
11 | echo 'export PATH=$PATH:/opendevin/plugins/swe_agent_commands' >> ~/.bashrc
12 | 
13 | echo 'source /opendevin/plugins/swe_agent_commands/defaults.sh' >> ~/.bashrc
14 | echo 'source /opendevin/plugins/swe_agent_commands/search.sh' >> ~/.bashrc
15 | echo 'source /opendevin/plugins/swe_agent_commands/edit_linting.sh' >> ~/.bashrc
16 | 
17 | echo 'export SWE_CMD_WORK_DIR="/opendevin/plugins/swe_agent_commands/workdir"' >> ~/.bashrc
18 | sudo mkdir -p /opendevin/plugins/swe_agent_commands/workdir
19 | sudo chmod 777 /opendevin/plugins/swe_agent_commands/workdir
20 | 


--------------------------------------------------------------------------------
/easyweb/runtime/process.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class Process(ABC):
 5 |     @property
 6 |     @abstractmethod
 7 |     def pid(self) -> int:
 8 |         pass
 9 | 
10 |     @property
11 |     @abstractmethod
12 |     def command(self) -> str:
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def read_logs(self) -> str:
17 |         pass
18 | 


--------------------------------------------------------------------------------
/easyweb/runtime/sandbox.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from abc import ABC, abstractmethod
 4 | 
 5 | from easyweb.core.config import config
 6 | from easyweb.core.schema import CancellableStream
 7 | from easyweb.runtime.docker.process import Process
 8 | from easyweb.runtime.plugins.mixin import PluginMixin
 9 | 
10 | 
11 | class Sandbox(ABC, PluginMixin):
12 |     background_commands: dict[int, Process] = {}
13 |     _env: dict[str, str] = {}
14 |     is_initial_session: bool = True
15 | 
16 |     def __init__(self, **kwargs):
17 |         for key in os.environ:
18 |             if key.startswith('SANDBOX_ENV_'):
19 |                 sandbox_key = key.removeprefix('SANDBOX_ENV_')
20 |                 self.add_to_env(sandbox_key, os.environ[key])
21 |         if config.enable_auto_lint:
22 |             self.add_to_env('ENABLE_AUTO_LINT', 'true')
23 |         self.initialize_plugins: bool = config.initialize_plugins
24 | 
25 |     def add_to_env(self, key: str, value: str):
26 |         self._env[key] = value
27 |         # Note: json.dumps gives us nice escaping for free
28 |         self.execute(f'export {key}={json.dumps(value)}')
29 | 
30 |     @abstractmethod
31 |     def execute(
32 |         self, cmd: str, stream: bool = False, timeout: int | None = None
33 |     ) -> tuple[int, str | CancellableStream]:
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def execute_in_background(self, cmd: str) -> Process:
38 |         pass
39 | 
40 |     @abstractmethod
41 |     def kill_background(self, id: int) -> Process:
42 |         pass
43 | 
44 |     @abstractmethod
45 |     def read_logs(self, id: int) -> str:
46 |         pass
47 | 
48 |     @abstractmethod
49 |     def close(self):
50 |         pass
51 | 
52 |     @abstractmethod
53 |     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
54 |         pass
55 | 
56 |     @abstractmethod
57 |     def get_working_directory(self):
58 |         pass
59 | 


--------------------------------------------------------------------------------
/easyweb/runtime/server/browse.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from easyweb.core.exceptions import BrowserUnavailableException
 4 | from easyweb.core.schema import ActionType
 5 | from easyweb.events.observation import BrowserOutputObservation
 6 | from easyweb.runtime.browser.browser_env import BrowserEnv
 7 | 
 8 | 
 9 | async def browse(action, browser: BrowserEnv | None) -> BrowserOutputObservation:
10 |     if browser is None:
11 |         raise BrowserUnavailableException()
12 |     if action.action == ActionType.BROWSE:
13 |         # legacy BrowseURLAction
14 |         asked_url = action.url
15 |         if not asked_url.startswith('http'):
16 |             asked_url = os.path.abspath(os.curdir) + action.url
17 |         action_str = f'goto("{asked_url}")'
18 |     elif action.action == ActionType.BROWSE_INTERACTIVE:
19 |         # new BrowseInteractiveAction, supports full featured BrowserGym actions
20 |         # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
21 |         action_str = action.browser_actions
22 |     else:
23 |         raise ValueError(f'Invalid action type: {action.action}')
24 |     try:
25 |         # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
26 |         obs = browser.step(action_str)
27 |         return BrowserOutputObservation(
28 |             content=obs['text_content'],  # text content of the page
29 |             open_pages_urls=obs['open_pages_urls'],  # list of open pages
30 |             active_page_index=obs['active_page_index'],  # index of the active page
31 |             dom_object=obs['dom_object'],  # DOM object
32 |             axtree_object=obs['axtree_object'],  # accessibility tree object
33 |             extra_element_properties=obs[
34 |                 'extra_element_properties'
35 |             ],  # extra element properties
36 |             last_browser_action=obs['last_action'],  # last browser env action performed
37 |             focused_element_bid=obs['focused_element_bid'],  # focused element bid
38 |             screenshot=obs['screenshot'],  # base64-encoded screenshot, png
39 |             url=obs['url'],  # URL of the page
40 |             error=True if obs['last_action_error'] else False,  # error flag
41 |             last_browser_action_error=obs[
42 |                 'last_action_error'
43 |             ],  # last browser env action error
44 |             scroll_position=obs['scroll_position'],
45 |         )
46 |     except Exception as e:
47 |         return BrowserOutputObservation(
48 |             content=str(e),
49 |             screenshot='',
50 |             error=True,
51 |             last_browser_action_error=str(e),
52 |             url=asked_url if action.action == ActionType.BROWSE else '',
53 |         )
54 | 


--------------------------------------------------------------------------------
/easyweb/runtime/tools.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class RuntimeTool(Enum):
5 |     BROWSER = 'browser'
6 | 


--------------------------------------------------------------------------------
/easyweb/runtime/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .system import find_available_tcp_port
2 | 
3 | __all__ = ['find_available_tcp_port']
4 | 


--------------------------------------------------------------------------------
/easyweb/runtime/utils/singleton.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/runtime/utils/singleton.py


--------------------------------------------------------------------------------
/easyweb/runtime/utils/system.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | 
 4 | def find_available_tcp_port() -> int:
 5 |     """Find an available TCP port, return -1 if none available."""
 6 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 7 |     try:
 8 |         sock.bind(('localhost', 0))
 9 |         port = sock.getsockname()[1]
10 |         return port
11 |     except Exception:
12 |         return -1
13 |     finally:
14 |         sock.close()
15 | 


--------------------------------------------------------------------------------
/easyweb/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/server/__init__.py


--------------------------------------------------------------------------------
/easyweb/server/auth/__init__.py:
--------------------------------------------------------------------------------
1 | from .auth import get_sid_from_token, sign_token
2 | 
3 | __all__ = ['get_sid_from_token', 'sign_token']
4 | 


--------------------------------------------------------------------------------
/easyweb/server/auth/auth.py:
--------------------------------------------------------------------------------
 1 | import jwt
 2 | from jwt.exceptions import InvalidTokenError
 3 | 
 4 | from easyweb.core.config import config
 5 | from easyweb.core.logger import easyweb_logger as logger
 6 | 
 7 | 
 8 | def get_sid_from_token(token: str) -> str:
 9 |     """
10 |     Retrieves the session id from a JWT token.
11 | 
12 |     Parameters:
13 |         token (str): The JWT token from which the session id is to be extracted.
14 | 
15 |     Returns:
16 |         str: The session id if found and valid, otherwise an empty string.
17 |     """
18 |     try:
19 |         # Decode the JWT using the specified secret and algorithm
20 |         payload = jwt.decode(token, config.jwt_secret, algorithms=['HS256'])
21 | 
22 |         # Ensure the payload contains 'sid'
23 |         if 'sid' in payload:
24 |             return payload['sid']
25 |         else:
26 |             logger.error('SID not found in token')
27 |             return ''
28 |     except InvalidTokenError:
29 |         logger.error('Invalid token')
30 |     except Exception as e:
31 |         logger.exception('Unexpected error decoding token: %s', e)
32 |     return ''
33 | 
34 | 
35 | def sign_token(payload: dict[str, object]) -> str:
36 |     """Signs a JWT token."""
37 |     # payload = {
38 |     #     "sid": sid,
39 |     #     # "exp": datetime.now(timezone.utc) + timedelta(minutes=15),
40 |     # }
41 |     return jwt.encode(payload, config.jwt_secret, algorithm='HS256')
42 | 


--------------------------------------------------------------------------------
/easyweb/server/data_models/feedback.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Literal
 2 | 
 3 | import requests
 4 | from pydantic import BaseModel
 5 | 
 6 | from easyweb.core.logger import easyweb_logger as logger
 7 | 
 8 | 
 9 | class FeedbackDataModel(BaseModel):
10 |     version: str
11 |     email: str
12 |     token: str
13 |     feedback: Literal['positive', 'negative']
14 |     permissions: Literal['public', 'private']
15 |     trajectory: list[dict[str, Any]]
16 | 
17 | 
18 | FEEDBACK_URL = 'https://share-od-trajectory-3u9bw9tx.uc.gateway.dev/share_od_trajectory'
19 | 
20 | 
21 | def store_feedback(feedback: FeedbackDataModel):
22 |     # Start logging
23 |     display_feedback = feedback.model_dump()
24 |     if 'trajectory' in display_feedback:
25 |         display_feedback['trajectory'] = (
26 |             f"elided [length: {len(display_feedback['trajectory'])}"
27 |         )
28 |     if 'token' in display_feedback:
29 |         display_feedback['token'] = 'elided'
30 |     logger.info(f'Got feedback: {display_feedback}')
31 |     # Start actual request
32 |     response = requests.post(
33 |         FEEDBACK_URL,
34 |         headers={'Content-Type': 'application/json'},
35 |         json=feedback.model_dump(),
36 |     )
37 |     logger.info(f'Stored feedback: {response.status_code} {response.text}')
38 |     if response.status_code != 200:
39 |         raise ValueError(f'Failed to store feedback: {response.text}')
40 | 


--------------------------------------------------------------------------------
/easyweb/server/mock/README.md:
--------------------------------------------------------------------------------
 1 | # OpenDevin mock server
 2 | This is a simple mock server to facilitate development in the frontend.
 3 | 
 4 | ## Start the Server
 5 | Follow the instructions in the README to install dependencies. Then run:
 6 | ```
 7 | python listen.py
 8 | ```
 9 | 
10 | Then open the frontend to connect to the mock server. It will simply reply to every received message.
11 | 


--------------------------------------------------------------------------------
/easyweb/server/mock/listen.py:
--------------------------------------------------------------------------------
 1 | import uvicorn
 2 | from fastapi import FastAPI, WebSocket
 3 | 
 4 | from easyweb.core.schema import ActionType
 5 | 
 6 | app = FastAPI()
 7 | 
 8 | 
 9 | @app.websocket('/ws')
10 | async def websocket_endpoint(websocket: WebSocket):
11 |     await websocket.accept()
12 |     # send message to mock connection
13 |     await websocket.send_json(
14 |         {'action': ActionType.INIT, 'message': 'Control loop started.'}
15 |     )
16 | 
17 |     try:
18 |         while True:
19 |             # receive message
20 |             data = await websocket.receive_json()
21 |             print(f'Received message: {data}')
22 | 
23 |             # send mock response to client
24 |             response = {'message': f'receive {data}'}
25 |             await websocket.send_json(response)
26 |             print(f'Sent message: {response}')
27 |     except Exception as e:
28 |         print(f'WebSocket Error: {e}')
29 | 
30 | 
31 | @app.get('/')
32 | def read_root():
33 |     return {'message': 'This is a mock server'}
34 | 
35 | 
36 | @app.get('/api/options/models')
37 | def read_llm_models():
38 |     return [
39 |         'gpt-4',
40 |         'gpt-4-turbo-preview',
41 |         'gpt-4-0314',
42 |         'gpt-4-0613',
43 |     ]
44 | 
45 | 
46 | @app.get('/api/options/agents')
47 | def read_llm_agents():
48 |     return [
49 |         'MonologueAgent',
50 |         'CodeActAgent',
51 |         'PlannerAgent',
52 |     ]
53 | 
54 | 
55 | @app.get('/api/list-files')
56 | def refresh_files():
57 |     return ['hello_world.py']
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     uvicorn.run(app, host='127.0.0.1', port=3000)
62 | 


--------------------------------------------------------------------------------
/easyweb/server/session/__init__.py:
--------------------------------------------------------------------------------
1 | from .manager import SessionManager
2 | from .session import Session
3 | 
4 | session_manager = SessionManager()
5 | 
6 | __all__ = ['Session', 'SessionManager', 'session_manager', 'message_stack']
7 | 


--------------------------------------------------------------------------------
/easyweb/server/session/manager.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import time
 3 | from typing import Optional
 4 | 
 5 | from fastapi import WebSocket
 6 | 
 7 | from easyweb.core.logger import easyweb_logger as logger
 8 | 
 9 | from .session import Session
10 | 
11 | 
12 | class SessionManager:
13 |     _sessions: dict[str, Session] = {}
14 |     cleanup_interval: int = 600
15 |     session_timeout: int = 1800
16 | 
17 |     def __init__(self):
18 |         asyncio.create_task(self._cleanup_sessions())
19 | 
20 |     def add_or_restart_session(self, sid: str, ws_conn: WebSocket) -> Session:
21 |         if sid in self._sessions:
22 |             asyncio.create_task(self._sessions[sid].close())
23 |         self._sessions[sid] = Session(sid=sid, ws=ws_conn)
24 |         return self._sessions[sid]
25 | 
26 |     def get_session(self, sid: str) -> Session | None:
27 |         if sid not in self._sessions:
28 |             return None
29 |         return self._sessions.get(sid)
30 | 
31 |     async def send(self, sid: str, data: dict[str, object]) -> bool:
32 |         """Sends data to the client."""
33 |         if sid not in self._sessions:
34 |             return False
35 |         return await self._sessions[sid].send(data)
36 | 
37 |     async def send_error(self, sid: str, message: str) -> bool:
38 |         """Sends an error message to the client."""
39 |         return await self.send(sid, {'error': True, 'message': message})
40 | 
41 |     async def send_message(self, sid: str, message: str) -> bool:
42 |         """Sends a message to the client."""
43 |         return await self.send(sid, {'message': message})
44 | 
45 |     async def _cleanup_sessions(self):
46 |         while True:
47 |             current_time = time.time()
48 |             session_ids_to_remove = []
49 |             for sid, session in list(self._sessions.items()):
50 |                 # if session inactive for a long time, remove it
51 |                 if (
52 |                     not session.is_alive
53 |                     and current_time - session.last_active_ts > self.session_timeout
54 |                 ):
55 |                     session_ids_to_remove.append(sid)
56 | 
57 |             for sid in session_ids_to_remove:
58 |                 to_del_session: Optional[Session] = self._sessions.pop(sid, None)
59 |                 if to_del_session is not None:
60 |                     await to_del_session.close()
61 |                     logger.info(
62 |                         f'Session {sid} and related resource have been removed due to inactivity.'
63 |                     )
64 | 
65 |             await asyncio.sleep(self.cleanup_interval)
66 | 


--------------------------------------------------------------------------------
/easyweb/storage/__init__.py:
--------------------------------------------------------------------------------
 1 | from easyweb.core.config import config
 2 | 
 3 | from .files import FileStore
 4 | from .local import LocalFileStore
 5 | from .memory import InMemoryFileStore
 6 | from .s3 import S3FileStore
 7 | 
 8 | 
 9 | def _get_file_store() -> FileStore:
10 |     if config.file_store == 'local':
11 |         return LocalFileStore(config.file_store_path)
12 |     elif config.file_store == 's3':
13 |         return S3FileStore()
14 |     return InMemoryFileStore()
15 | 
16 | 
17 | singleton = _get_file_store()
18 | 
19 | 
20 | def get_file_store() -> FileStore:
21 |     return singleton
22 | 


--------------------------------------------------------------------------------
/easyweb/storage/files.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | 
 3 | 
 4 | class FileStore:
 5 |     @abstractmethod
 6 |     def write(self, path: str, contents: str) -> None:
 7 |         pass
 8 | 
 9 |     @abstractmethod
10 |     def read(self, path: str) -> str:
11 |         pass
12 | 
13 |     @abstractmethod
14 |     def list(self, path: str) -> list[str]:
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def delete(self, path: str) -> None:
19 |         pass
20 | 


--------------------------------------------------------------------------------
/easyweb/storage/local.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .files import FileStore
 4 | 
 5 | 
 6 | class LocalFileStore(FileStore):
 7 |     root: str
 8 | 
 9 |     def __init__(self, root: str):
10 |         self.root = root
11 |         os.makedirs(self.root, exist_ok=True)
12 | 
13 |     def get_full_path(self, path: str) -> str:
14 |         if path.startswith('/'):
15 |             path = path[1:]
16 |         return os.path.join(self.root, path)
17 | 
18 |     def write(self, path: str, contents: str) -> None:
19 |         full_path = self.get_full_path(path)
20 |         os.makedirs(os.path.dirname(full_path), exist_ok=True)
21 |         with open(full_path, 'w') as f:
22 |             f.write(contents)
23 | 
24 |     def read(self, path: str) -> str:
25 |         full_path = self.get_full_path(path)
26 |         with open(full_path, 'r') as f:
27 |             return f.read()
28 | 
29 |     def list(self, path: str) -> list[str]:
30 |         full_path = self.get_full_path(path)
31 |         files = [os.path.join(path, f) for f in os.listdir(full_path)]
32 |         files = [f + '/' if os.path.isdir(self.get_full_path(f)) else f for f in files]
33 |         return files
34 | 
35 |     def delete(self, path: str) -> None:
36 |         full_path = self.get_full_path(path)
37 |         os.remove(full_path)
38 | 


--------------------------------------------------------------------------------
/easyweb/storage/memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .files import FileStore
 4 | 
 5 | 
 6 | class InMemoryFileStore(FileStore):
 7 |     files: dict[str, str]
 8 | 
 9 |     def __init__(self):
10 |         self.files = {}
11 | 
12 |     def write(self, path: str, contents: str) -> None:
13 |         self.files[path] = contents
14 | 
15 |     def read(self, path: str) -> str:
16 |         if path not in self.files:
17 |             raise FileNotFoundError(path)
18 |         return self.files[path]
19 | 
20 |     def list(self, path: str) -> list[str]:
21 |         files = []
22 |         for file in self.files:
23 |             if not file.startswith(path):
24 |                 continue
25 |             suffix = file.removeprefix(path)
26 |             parts = suffix.split('/')
27 |             if parts[0] == '':
28 |                 parts.pop(0)
29 |             if len(parts) == 1:
30 |                 files.append(file)
31 |             else:
32 |                 dir_path = os.path.join(path, parts[0])
33 |                 if not dir_path.endswith('/'):
34 |                     dir_path += '/'
35 |                 if dir_path not in files:
36 |                     files.append(dir_path)
37 |         return files
38 | 
39 |     def delete(self, path: str) -> None:
40 |         del self.files[path]
41 | 


--------------------------------------------------------------------------------
/easyweb/storage/s3.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from minio import Minio
 4 | 
 5 | from .files import FileStore
 6 | 
 7 | AWS_S3_ENDPOINT = 's3.amazonaws.com'
 8 | 
 9 | 
10 | class S3FileStore(FileStore):
11 |     def __init__(self, endpoint: str = AWS_S3_ENDPOINT) -> None:
12 |         access_key = os.getenv('AWS_ACCESS_KEY_ID')
13 |         secret_key = os.getenv('AWS_SECRET_ACCESS_KEY')
14 |         self.bucket = os.getenv('AWS_S3_BUCKET')
15 |         self.client = Minio(endpoint, access_key, secret_key)
16 | 
17 |     def write(self, path: str, contents: str) -> None:
18 |         self.client.put_object(self.bucket, path, contents)
19 | 
20 |     def read(self, path: str) -> str:
21 |         return self.client.get_object(self.bucket, path).data.decode('utf-8')
22 | 
23 |     def list(self, path: str) -> list[str]:
24 |         return [obj.object_name for obj in self.client.list_objects(self.bucket, path)]
25 | 
26 |     def delete(self, path: str) -> None:
27 |         self.client.remove_object(self.bucket, path)
28 | 


--------------------------------------------------------------------------------
/frontend-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/frontend-icon.png


--------------------------------------------------------------------------------
/model_port_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "gpt-4o-mini":
 3 |     {
 4 |         "base_url": "https://api.openai.com/v1/",
 5 |         "requires_key": false,
 6 |         "default": true,
 7 |         "display_name": "GPT-4o-mini (Free)"
 8 |     },
 9 |     "gpt-4o":
10 |     {
11 |         "base_url": "https://api.openai.com/v1/",
12 |         "requires_key": true,
13 |         "display_name": "GPT-4o"
14 |     },
15 |     "o1":
16 |     {
17 |         "requires_key": true,
18 |         "base_url": "https://api.openai.com/v1/",
19 |         "display_name": "OpenAI o1"
20 |     },
21 |     "o3-mini":
22 |     {
23 |         "requires_key": true,
24 |         "base_url": "https://api.openai.com/v1/",
25 |         "display_name": "OpenAI o3-mini"
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "easyweb"
 3 | version = "0.1.0"
 4 | description = "EasyWeb: UI Agents at Your Fingertips"
 5 | authors = ["Maitrix Team"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | repository = "https://github.com/maitrix-org/easyweb"
 9 | packages = [
10 |     { include = "reasoners", from = "../llm-reasoners" },
11 | ]
12 | 
13 | [tool.poetry.dependencies]
14 | python = ">=3.11,<3.13"
15 | datasets = "*"
16 | pandas = "*"
17 | litellm = "*"
18 | google-generativeai = "*" # To use litellm with Gemini Pro API
19 | termcolor = "*"
20 | seaborn = "*"
21 | docker = "*"
22 | fastapi = "*"
23 | toml = "*"
24 | uvicorn = "*"
25 | types-toml = "*"
26 | numpy = "1.26.0"
27 | json-repair = "*"
28 | browsergym = "*" # integrate browsergym as the browsing interface
29 | html2text = "*"
30 | e2b = "^0.17.1"
31 | pexpect = "*"
32 | jinja2 = "^3.1.3"
33 | python-multipart = "*"
34 | boto3 = "*"
35 | minio = "^7.2.7"
36 | gevent = "^24.2.1"
37 | pyarrow = "16.1.0" # transitive dependency, pinned here to avoid conflicts
38 | tenacity = "^8.3.0"
39 | zope-interface = "6.4.post2"
40 | gradio = "5.1.0"
41 | websocket-client = "*"
42 | bs4 = "*"
43 | 
44 | [tool.poetry.group.llama-index.dependencies]
45 | llama-index = "*"
46 | llama-index-vector-stores-chroma = "*"
47 | chromadb = "*"
48 | llama-index-embeddings-huggingface = "*"
49 | torch = "2.2.2"
50 | llama-index-embeddings-azure-openai = "*"
51 | llama-index-embeddings-ollama = "*"
52 | 
53 | [tool.poetry.group.dev.dependencies]
54 | ruff = "0.4.8"
55 | mypy = "1.10.0"
56 | pre-commit = "3.7.1"
57 | 
58 | [tool.poetry.group.test.dependencies]
59 | pytest = "*"
60 | pytest-cov = "*"
61 | pytest-asyncio = "*"
62 | pytest-forked = "*"
63 | flake8 = "*"
64 | openai = "*"
65 | python-docx = "*"
66 | PyPDF2 = "*"
67 | pylatexenc = "*"
68 | python-pptx = "*"
69 | opencv-python = "*"
70 | pandas = "*"
71 | reportlab = "*"
72 | 
73 | [tool.coverage.run]
74 | concurrency = ["gevent"]
75 | 
76 | 
77 | [tool.poetry.group.evaluation.dependencies]
78 | streamlit = "*"
79 | whatthepatch = "*"
80 | retry = "*"
81 | evaluate = "*"
82 | 
83 | [build-system]
84 | build-backend = "poetry.core.masonry.api"
85 | requires = [
86 |   "poetry-core",
87 | ]
88 | 
89 | [tool.autopep8]
90 | # autopep8 fights with mypy on line length issue
91 | ignore = [ "E501" ]
92 | 
93 | [tool.black]
94 | # prevent black (if installed) from changing single quotes to double quotes
95 | skip-string-normalization = true
96 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = -p no:warnings
3 | 


--------------------------------------------------------------------------------
/tests/integration/mock/BrowsingAgent/test_browse_internet/response_001.log:
--------------------------------------------------------------------------------
1 | In order to accomplish my goal, I need to navigate to the localhost:8000 page.
2 | ```goto('http://localhost:8000')```
3 | 


--------------------------------------------------------------------------------
/tests/integration/mock/BrowsingAgent/test_browse_internet/response_002.log:
--------------------------------------------------------------------------------
1 | In order to accomplish my goal, I need to click on the button with bid 10 to reveal the answer to life, the universe, and everything.
2 | ```click("10")```
3 | 


--------------------------------------------------------------------------------
/tests/integration/mock/BrowsingAgent/test_browse_internet/response_003.log:
--------------------------------------------------------------------------------
1 | In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything. The text states: "The answer is OpenDevin is all you need!"
2 | 
3 | ```send_msg_to_user('The answer is OpenDevin is all you need!')```
4 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_browse_internet/response_001.log:
--------------------------------------------------------------------------------
1 | Sure! Let me browse the server's homepage at http://localhost:8000 and find the ultimate answer to life.
2 | 
3 | <execute_browse>
4 | Get the content on "http://localhost:8000"
5 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_browse_internet/response_002.log:
--------------------------------------------------------------------------------
1 | In order to accomplish my goal, I need to get the content on the server's homepage at http://localhost:8000.
2 | ```goto('http://localhost:8000')```
3 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_browse_internet/response_003.log:
--------------------------------------------------------------------------------
1 | In order to accomplish my goal, I need to click on the button labeled "Click me" with bid 10 to reveal the answer to life, the universe, and everything.
2 | ```click("10")```
3 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_browse_internet/response_004.log:
--------------------------------------------------------------------------------
1 | In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything. The text is already visible in the accessibility tree.
2 | 
3 | The answer is: "OpenDevin is all you need!"
4 | 
5 | ```send_msg_to_user('The answer to life, the universe, and everything is: OpenDevin is all you need!')```
6 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_browse_internet/response_005.log:
--------------------------------------------------------------------------------
1 | The content of the server's homepage at localhost:8000 is: "The answer to life, the universe, and everything is: OpenDevin is all you need!"
2 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_edits/response_001.log:
--------------------------------------------------------------------------------
1 | Sure! Let's start by opening the `bad.txt` file to see its content.
2 | 
3 | <execute_ipython>
4 | open_file('bad.txt')
5 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_edits/response_002.log:
--------------------------------------------------------------------------------
1 | Let's correct the typos in the `bad.txt` file.
2 | 
3 | <execute_ipython>
4 | edit_file('bad.txt', start=1, end=4, content="""This is a stupid typo.
5 | Really?
6 | No more typos!
7 | Enjoy!""")
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_edits/response_003.log:
--------------------------------------------------------------------------------
1 | The typos in `bad.txt` have been corrected. If you need any further assistance, feel free to ask!
2 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_ipython/response_001.log:
--------------------------------------------------------------------------------
1 | Sure! Let me write 'hello world' to a text file named 'test.txt' in the '/workspace' directory without asking for confirmation:
2 | <execute_ipython>
3 | with open('/workspace/test.txt', 'w') as f:
4 |     f.write('hello world')
5 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_ipython/response_002.log:
--------------------------------------------------------------------------------
1 | The text file 'test.txt' containing 'hello world' has been successfully created in the '/workspace' directory. If you need any further assistance or have any more requests, feel free to let me know!
2 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_ipython_module/response_001.log:
--------------------------------------------------------------------------------
 1 | Understood! Let's proceed by installing the `pymsgbox` package version 1.0.9:
 2 | 
 3 | 
 4 | ```
 5 | pythonCopy code`<execute_ipython>
 6 | # Install pymsgbox version 1.0.9
 7 | %pip install pymsgbox==1.0.9
 8 | </execute_ipython>`
 9 | ```
10 | Once the installation is complete, we'll import `pymsgbox` and print its version to a file named `test.txt` in the `/workspace` directory. Let's continue!
11 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_ipython_module/response_002.log:
--------------------------------------------------------------------------------
 1 | Now that `pymsgbox` is installed, let's import it and print its version to a file named `test.txt`:
 2 | 
 3 | 
 4 | ```
 5 | pythonCopy code`<execute_ipython>
 6 | import pymsgbox
 7 | 
 8 | # Get the version of pymsgbox
 9 | pymsgbox_version = pymsgbox.__version__
10 | 
11 | # Write the version to a file
12 | with open("/workspace/test.txt", "w") as f:
13 |     f.write(pymsgbox_version)
14 | </execute_ipython>`
15 | ```
16 | This will import `pymsgbox`, retrieve its version, and then write the version to the `test.txt` file in the `/workspace` directory.
17 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_ipython_module/response_003.log:
--------------------------------------------------------------------------------
1 | The `pymsgbox` package version 1.0.9 has been successfully installed and its version has been written to the `test.txt` file in the `/workspace` directory. If you need any further assistance, feel free to ask!
2 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log:
--------------------------------------------------------------------------------
1 | Sure! I'll create the shell script `hello.sh` for you.
2 | 
3 | <execute_bash>
4 | echo 'echo "hello"' > hello.sh
5 | chmod +x hello.sh
6 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log:
--------------------------------------------------------------------------------
1 | The shell script `hello.sh` has been created and made executable. Let's run it to ensure it works correctly.
2 | 
3 | <execute_bash>
4 | ./hello.sh
5 | 


--------------------------------------------------------------------------------
/tests/integration/mock/CodeActAgent/test_write_simple_script/response_003.log:
--------------------------------------------------------------------------------
1 | The shell script `hello.sh` successfully prints "hello". If you have any further tasks or questions, feel free to let me know!
2 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/prompt_004.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point.
10 | 
11 | 
12 | Here's a summary of the codebase, as it relates to this task:
13 | 
14 | The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is:
15 | 
16 | This is a stupid typoo.
17 | Really?
18 | No mor typos!
19 | Enjoy!
20 | 
21 | The engineering team needs to correct the typos in this file.
22 | 
23 | 
24 | ## Available Actions
25 | * `run` - runs a command on the command line in a Linux shell. Arguments:
26 |   * `command` - the command to run
27 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
28 | 
29 | * `write` - writes the content to a file. Arguments:
30 |   * `path` - the path of the file to write
31 |   * `content` - the content to write to the file
32 | 
33 | * `read` - reads the content of a file. Arguments:
34 |   * `path` - the path of the file to read
35 | 
36 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
37 |   * `content` - the thought to record
38 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
39 | 
40 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
41 |   * `outputs` - a dictionary representing the outputs of your task, if any
42 | 
43 | 
44 | Do NOT finish until you have completed the tasks.
45 | 
46 | ## History
47 | Here is a recent history of actions you've taken in service of this plan,
48 | as well as observations you've made. This only includes the MOST RECENT
49 | actions and observations--more may have happened before that.
50 | They are time-ordered, with your most recent action at the bottom.
51 | 
52 | []
53 | 
54 | ## Format
55 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
56 | * `action`, which is one of the actions specified here
57 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
58 | 
59 | You MUST NOT include any other text besides the JSON response
60 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/prompt_005.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point.
10 | 
11 | 
12 | Here's a summary of the codebase, as it relates to this task:
13 | 
14 | The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is:
15 | 
16 | This is a stupid typoo.
17 | Really?
18 | No mor typos!
19 | Enjoy!
20 | 
21 | The engineering team needs to correct the typos in this file.
22 | 
23 | 
24 | ## Available Actions
25 | * `run` - runs a command on the command line in a Linux shell. Arguments:
26 |   * `command` - the command to run
27 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
28 | 
29 | * `write` - writes the content to a file. Arguments:
30 |   * `path` - the path of the file to write
31 |   * `content` - the content to write to the file
32 | 
33 | * `read` - reads the content of a file. Arguments:
34 |   * `path` - the path of the file to read
35 | 
36 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
37 |   * `content` - the thought to record
38 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
39 | 
40 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
41 |   * `outputs` - a dictionary representing the outputs of your task, if any
42 | 
43 | 
44 | Do NOT finish until you have completed the tasks.
45 | 
46 | ## History
47 | Here is a recent history of actions you've taken in service of this plan,
48 | as well as observations you've made. This only includes the MOST RECENT
49 | actions and observations--more may have happened before that.
50 | They are time-ordered, with your most recent action at the bottom.
51 | 
52 | [[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]]
53 | 
54 | ## Format
55 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
56 | * `action`, which is one of the actions specified here
57 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
58 | 
59 | You MUST NOT include any other text besides the JSON response
60 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/prompt_006.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point.
10 | 
11 | 
12 | Here's a summary of the codebase, as it relates to this task:
13 | 
14 | The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is:
15 | 
16 | This is a stupid typoo.
17 | Really?
18 | No mor typos!
19 | Enjoy!
20 | 
21 | The engineering team needs to correct the typos in this file.
22 | 
23 | 
24 | ## Available Actions
25 | * `run` - runs a command on the command line in a Linux shell. Arguments:
26 |   * `command` - the command to run
27 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
28 | 
29 | * `write` - writes the content to a file. Arguments:
30 |   * `path` - the path of the file to write
31 |   * `content` - the content to write to the file
32 | 
33 | * `read` - reads the content of a file. Arguments:
34 |   * `path` - the path of the file to read
35 | 
36 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
37 |   * `content` - the thought to record
38 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
39 | 
40 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
41 |   * `outputs` - a dictionary representing the outputs of your task, if any
42 | 
43 | 
44 | Do NOT finish until you have completed the tasks.
45 | 
46 | ## History
47 | Here is a recent history of actions you've taken in service of this plan,
48 | as well as observations you've made. This only includes the MOST RECENT
49 | actions and observations--more may have happened before that.
50 | They are time-ordered, with your most recent action at the bottom.
51 | 
52 | [[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}], [{"source": "agent", "action": "write", "args": {"path": "bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "bad.txt"}}]]
53 | 
54 | ## Format
55 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
56 | * `action`, which is one of the actions specified here
57 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
58 | 
59 | You MUST NOT include any other text besides the JSON response
60 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/prompt_007.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | []
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/prompt_008.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | [[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n", "extras": {"path": "bad.txt"}}]]
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_001.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "ls",
5 |     "background": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_002.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "read",
3 |   "args": {
4 |     "path": "bad.txt"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_003.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "finish",
3 |   "args": {
4 |     "outputs": {
5 |       "summary": "The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is:\n\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nThe engineering team needs to correct the typos in this file."
6 |     }
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_004.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "read",
4 |   "args": {
5 |     "path": "bad.txt"
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_005.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "write",
 4 |   "args": {
 5 |     "path": "bad.txt",
 6 |     "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n"
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_006.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "finish",
4 |   "args": {
5 |     "outputs": {}
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_007.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "read",
4 |   "args": {
5 |     "path": "bad.txt"
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_edits/response_008.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "finish",
 4 |   "args": {
 5 |     "outputs": {
 6 |       "completed": true
 7 |     }
 8 |   }
 9 | }
10 | ```
11 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_003.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
10 | 
11 | 
12 | Here's a summary of the codebase, as it relates to this task:
13 | 
14 | The codebase appears to be empty. Engineers should start everything from scratch.
15 | 
16 | 
17 | ## Available Actions
18 | * `run` - runs a command on the command line in a Linux shell. Arguments:
19 |   * `command` - the command to run
20 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
21 | 
22 | * `write` - writes the content to a file. Arguments:
23 |   * `path` - the path of the file to write
24 |   * `content` - the content to write to the file
25 | 
26 | * `read` - reads the content of a file. Arguments:
27 |   * `path` - the path of the file to read
28 | 
29 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
30 |   * `content` - the thought to record
31 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
32 | 
33 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
34 |   * `outputs` - a dictionary representing the outputs of your task, if any
35 | 
36 | 
37 | Do NOT finish until you have completed the tasks.
38 | 
39 | ## History
40 | Here is a recent history of actions you've taken in service of this plan,
41 | as well as observations you've made. This only includes the MOST RECENT
42 | actions and observations--more may have happened before that.
43 | They are time-ordered, with your most recent action at the bottom.
44 | 
45 | []
46 | 
47 | ## Format
48 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
49 | * `action`, which is one of the actions specified here
50 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
51 | 
52 | You MUST NOT include any other text besides the JSON response
53 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_004.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
10 | 
11 | 
12 | Here's a summary of the codebase, as it relates to this task:
13 | 
14 | The codebase appears to be empty. Engineers should start everything from scratch.
15 | 
16 | 
17 | ## Available Actions
18 | * `run` - runs a command on the command line in a Linux shell. Arguments:
19 |   * `command` - the command to run
20 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
21 | 
22 | * `write` - writes the content to a file. Arguments:
23 |   * `path` - the path of the file to write
24 |   * `content` - the content to write to the file
25 | 
26 | * `read` - reads the content of a file. Arguments:
27 |   * `path` - the path of the file to read
28 | 
29 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
30 |   * `content` - the thought to record
31 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
32 | 
33 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
34 |   * `outputs` - a dictionary representing the outputs of your task, if any
35 | 
36 | 
37 | Do NOT finish until you have completed the tasks.
38 | 
39 | ## History
40 | Here is a recent history of actions you've taken in service of this plan,
41 | as well as observations you've made. This only includes the MOST RECENT
42 | actions and observations--more may have happened before that.
43 | They are time-ordered, with your most recent action at the bottom.
44 | 
45 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}]]
46 | 
47 | ## Format
48 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
49 | * `action`, which is one of the actions specified here
50 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
51 | 
52 | You MUST NOT include any other text besides the JSON response
53 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_005.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
10 | 
11 | 
12 | Here's a summary of the codebase, as it relates to this task:
13 | 
14 | The codebase appears to be empty. Engineers should start everything from scratch.
15 | 
16 | 
17 | ## Available Actions
18 | * `run` - runs a command on the command line in a Linux shell. Arguments:
19 |   * `command` - the command to run
20 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
21 | 
22 | * `write` - writes the content to a file. Arguments:
23 |   * `path` - the path of the file to write
24 |   * `content` - the content to write to the file
25 | 
26 | * `read` - reads the content of a file. Arguments:
27 |   * `path` - the path of the file to read
28 | 
29 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
30 |   * `content` - the thought to record
31 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
32 | 
33 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
34 |   * `outputs` - a dictionary representing the outputs of your task, if any
35 | 
36 | 
37 | Do NOT finish until you have completed the tasks.
38 | 
39 | ## History
40 | Here is a recent history of actions you've taken in service of this plan,
41 | as well as observations you've made. This only includes the MOST RECENT
42 | actions and observations--more may have happened before that.
43 | They are time-ordered, with your most recent action at the bottom.
44 | 
45 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]]
46 | 
47 | ## Format
48 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
49 | * `action`, which is one of the actions specified here
50 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
51 | 
52 | You MUST NOT include any other text besides the JSON response
53 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_006.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | []
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_007.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\necho 'hello'\n", "extras": {"path": "hello.sh"}}]]
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_008.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\necho 'hello'\n", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]]
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_001.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "ls",
5 |     "background": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_002.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "finish",
3 |   "args": {
4 |     "outputs": {
5 |       "summary": "The codebase appears to be empty. Engineers should start everything from scratch."
6 |     }
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_003.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "write",
3 |   "args": {
4 |     "path": "hello.sh",
5 |     "content": "#!/bin/bash\necho 'hello'"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_004.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "bash hello.sh",
5 |     "background": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_005.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "finish",
3 |   "args": {
4 |     "outputs": {
5 |       "result": "hello"
6 |     }
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_006.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "read",
3 |   "args": {
4 |     "path": "hello.sh"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_007.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "bash hello.sh",
5 |     "background": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_008.log:
--------------------------------------------------------------------------------
 1 | The task was to create a shell script named `hello.sh` that prints "hello" without asking for any confirmation. Based on the history:
 2 | 
 3 | 1. The content of `hello.sh` was read and confirmed to contain:
 4 |    ```bash
 5 |    #!/bin/bash
 6 |    echo 'hello'
 7 |    ```
 8 |    This script correctly prints "hello" when executed.
 9 | 
10 | 2. The script was executed using the command `bash hello.sh`, and the output was "hello", which matches the expected output.
11 | 
12 | Since the script behaves as expected and meets the requirements of the task, the task is completed successfully.
13 | 
14 | ```json
15 | {
16 |   "action": "finish",
17 |   "args": {
18 |     "outputs": {
19 |       "completed": true
20 |     }
21 |   }
22 | }
23 | ```
24 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_edits/response_001.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "delegate",
 4 |   "args": {
 5 |     "agent": "TypoFixerAgent",
 6 |     "inputs": {
 7 |       "task": "Fix typos in bad.txt"
 8 |     }
 9 |   }
10 | }
11 | ```
12 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_edits/response_002.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "read",
3 |   "args": {
4 |     "path": "./bad.txt"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_edits/response_003.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "message",
3 |   "args": {
4 |     "content": "The content of 'bad.txt' has been read. The following typos have been identified:\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\nI will now proceed to fix these typos and overwrite the file with the corrected content.",
5 |     "wait_for_response": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_edits/response_004.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "write",
3 |   "args": {
4 |     "path": "./bad.txt",
5 |     "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_edits/response_005.log:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action": "finish",
 3 |   "args": {
 4 |     "outputs": {
 5 |       "summary": {
 6 |         "file": "bad.txt",
 7 |         "typos_fixed": [
 8 |           {
 9 |             "original": "typoo",
10 |             "corrected": "typo"
11 |           },
12 |           {
13 |             "original": "mor",
14 |             "corrected": "more"
15 |           }
16 |         ]
17 |       }
18 |     }
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_edits/response_006.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "finish",
 4 |   "args": {
 5 |     "outputs": {
 6 |       "file": "bad.txt",
 7 |       "typos_fixed": [
 8 |         {
 9 |           "original": "typoo",
10 |           "corrected": "typo"
11 |         },
12 |         {
13 |           "original": "mor",
14 |           "corrected": "more"
15 |         }
16 |       ]
17 |     }
18 |   }
19 | }
20 | ```
21 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/prompt_002.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a responsible software engineer and always write good commit messages.
 7 | 
 8 | Please analyze the diff in the staging area, understand the context and content
 9 | of the updates from the diff only. Identify key elements like:
10 | - Which files are affected?
11 | - What types of changes were made (e.g., new features, bug fixes, refactoring, documentation, testing)?
12 | 
13 | Then you should generate a commit message that succinctly summarizes the staged
14 | changes. The commit message should include:
15 | - A summary line that clearly states the purpose of the changes.
16 | - Optionally, a detailed description if the changes are complex or need further explanation.
17 | 
18 | You should find the diff using `git diff --cached`, compile a commit message,
19 | and call the `finish` action with `outputs.answer` set to the answer. If current
20 | repo is not a valid git repo, or there is no diff in the staging area, please call
21 | the `reject` action.
22 | 
23 | ## History
24 | Here is a recent history of actions you've taken in service of this plan,
25 | as well as observations you've made. This only includes the MOST RECENT
26 | actions and observations--more may have happened before that.
27 | They are time-ordered, with your most recent action at the bottom.
28 | 
29 | []
30 | 
31 | If the last item in the history is an error, you should try to fix it.
32 | 
33 | ## Available Actions
34 | * `run` - runs a command on the command line in a Linux shell. Arguments:
35 |   * `command` - the command to run
36 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
37 | 
38 | * `reject` - reject the task. Arguments:
39 |   * `outputs` - a dictionary with only a `reason` attribute
40 | 
41 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
42 |   * `outputs` - a dictionary representing the outputs of your task, if any
43 | 
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_001.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "delegate",
 4 |   "args": {
 5 |     "agent": "CommitWriterAgent",
 6 |     "inputs": {}
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_002.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "run",
4 |   "args": {
5 |     "command": "git diff --cached"
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_003.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "run",
 4 |   "args": {
 5 |     "command": "git diff --cached",
 6 |     "background": false
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_004.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "run",
 4 |   "args": {
 5 |     "command": "git diff --staged",
 6 |     "background": false
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_005.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "reject",
4 |   "args": {
5 |     "reason": "The current repository is not a valid git repository or there is no diff in the staging area."
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_006.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "reject",
4 |   "args": {
5 |     "reason": "The current repository is not a valid git repository or there is no diff in the staging area."
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_007.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "reject",
 4 |   "args": {
 5 |     "outputs": {
 6 |       "reason": "The current repository is not a valid git repository or there is no diff in the staging area."
 7 |     }
 8 |   }
 9 | }
10 | ```
11 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_008.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "reject",
 4 |   "args": {
 5 |     "outputs": {
 6 |       "reason": "The current repository is not a valid git repository or there is no diff in the staging area."
 7 |     }
 8 |   }
 9 | }
10 | ```
11 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_002.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'.
10 | 
11 | 
12 | 
13 | ## Available Actions
14 | * `run` - runs a command on the command line in a Linux shell. Arguments:
15 |   * `command` - the command to run
16 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
17 | 
18 | * `write` - writes the content to a file. Arguments:
19 |   * `path` - the path of the file to write
20 |   * `content` - the content to write to the file
21 | 
22 | * `read` - reads the content of a file. Arguments:
23 |   * `path` - the path of the file to read
24 | 
25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
26 |   * `content` - the thought to record
27 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
28 | 
29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
30 |   * `outputs` - a dictionary representing the outputs of your task, if any
31 | 
32 | 
33 | Do NOT finish until you have completed the tasks.
34 | 
35 | ## History
36 | Here is a recent history of actions you've taken in service of this plan,
37 | as well as observations you've made. This only includes the MOST RECENT
38 | actions and observations--more may have happened before that.
39 | They are time-ordered, with your most recent action at the bottom.
40 | 
41 | []
42 | 
43 | ## Format
44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
45 | * `action`, which is one of the actions specified here
46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
47 | 
48 | You MUST NOT include any other text besides the JSON response
49 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_003.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'.
10 | 
11 | 
12 | 
13 | ## Available Actions
14 | * `run` - runs a command on the command line in a Linux shell. Arguments:
15 |   * `command` - the command to run
16 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
17 | 
18 | * `write` - writes the content to a file. Arguments:
19 |   * `path` - the path of the file to write
20 |   * `content` - the content to write to the file
21 | 
22 | * `read` - reads the content of a file. Arguments:
23 |   * `path` - the path of the file to read
24 | 
25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
26 |   * `content` - the thought to record
27 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
28 | 
29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
30 |   * `outputs` - a dictionary representing the outputs of your task, if any
31 | 
32 | 
33 | Do NOT finish until you have completed the tasks.
34 | 
35 | ## History
36 | Here is a recent history of actions you've taken in service of this plan,
37 | as well as observations you've made. This only includes the MOST RECENT
38 | actions and observations--more may have happened before that.
39 | They are time-ordered, with your most recent action at the bottom.
40 | 
41 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}]]
42 | 
43 | ## Format
44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
45 | * `action`, which is one of the actions specified here
46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
47 | 
48 | You MUST NOT include any other text besides the JSON response
49 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_004.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'.
10 | 
11 | 
12 | 
13 | ## Available Actions
14 | * `run` - runs a command on the command line in a Linux shell. Arguments:
15 |   * `command` - the command to run
16 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
17 | 
18 | * `write` - writes the content to a file. Arguments:
19 |   * `path` - the path of the file to write
20 |   * `content` - the content to write to the file
21 | 
22 | * `read` - reads the content of a file. Arguments:
23 |   * `path` - the path of the file to read
24 | 
25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
26 |   * `content` - the thought to record
27 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
28 | 
29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
30 |   * `outputs` - a dictionary representing the outputs of your task, if any
31 | 
32 | 
33 | Do NOT finish until you have completed the tasks.
34 | 
35 | ## History
36 | Here is a recent history of actions you've taken in service of this plan,
37 | as well as observations you've made. This only includes the MOST RECENT
38 | actions and observations--more may have happened before that.
39 | They are time-ordered, with your most recent action at the bottom.
40 | 
41 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]]
42 | 
43 | ## Format
44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
45 | * `action`, which is one of the actions specified here
46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
47 | 
48 | You MUST NOT include any other text besides the JSON response
49 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_005.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a software engineer. You've inherited an existing codebase, which you
 7 | need to modify to complete this task:
 8 | 
 9 | Write a shell script 'hello.sh' that prints 'hello'.
10 | 
11 | 
12 | 
13 | ## Available Actions
14 | * `run` - runs a command on the command line in a Linux shell. Arguments:
15 |   * `command` - the command to run
16 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
17 | 
18 | * `write` - writes the content to a file. Arguments:
19 |   * `path` - the path of the file to write
20 |   * `content` - the content to write to the file
21 | 
22 | * `read` - reads the content of a file. Arguments:
23 |   * `path` - the path of the file to read
24 | 
25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
26 |   * `content` - the thought to record
27 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
28 | 
29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
30 |   * `outputs` - a dictionary representing the outputs of your task, if any
31 | 
32 | 
33 | Do NOT finish until you have completed the tasks.
34 | 
35 | ## History
36 | Here is a recent history of actions you've taken in service of this plan,
37 | as well as observations you've made. This only includes the MOST RECENT
38 | actions and observations--more may have happened before that.
39 | They are time-ordered, with your most recent action at the bottom.
40 | 
41 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}], [{"source": "agent", "action": "run", "args": {"command": "./hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]]
42 | 
43 | ## Format
44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
45 | * `action`, which is one of the actions specified here
46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
47 | 
48 | You MUST NOT include any other text besides the JSON response
49 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_007.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Verify that the shell script 'hello.sh' prints 'hello'.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | []
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_008.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Verify that the shell script 'hello.sh' prints 'hello'.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'\n", "extras": {"path": "hello.sh"}}]]
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_009.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ----------
 4 | 
 5 | # Task
 6 | You are a quality assurance engineer. Another engineer has made changes to the
 7 | codebase which are supposed to solve this task:
 8 | 
 9 | Verify that the shell script 'hello.sh' prints 'hello'.
10 | 
11 | Note the changes might have already been applied in-line. You should focus on
12 | validating if the task is solved, nothing else.
13 | 
14 | ## Available Actions
15 | * `run` - runs a command on the command line in a Linux shell. Arguments:
16 |   * `command` - the command to run
17 |   * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
18 | 
19 | * `read` - reads the content of a file. Arguments:
20 |   * `path` - the path of the file to read
21 | 
22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
23 |   * `content` - the thought to record
24 |   * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
25 | 
26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments:
27 |   * `outputs` - a dictionary representing the outputs of your task, if any
28 | 
29 | 
30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts.
31 | 
32 | Do NOT finish until you know whether the task is complete and correct.
33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action.
34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action
35 | explaining what the problem is.
36 | 
37 | ## History
38 | Here is a recent history of actions you've taken in service of this plan,
39 | as well as observations you've made. This only includes the MOST RECENT
40 | actions and observations--more may have happened before that.
41 | They are time-ordered, with your most recent action at the bottom.
42 | 
43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'\n", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]]
44 | 
45 | ## Format
46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields:
47 | * `action`, which is one of the actions specified here
48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action
49 | 
50 | You MUST NOT include any other text besides the JSON response
51 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_001.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "delegate",
 4 |   "args": {
 5 |     "agent": "CoderAgent",
 6 |     "inputs": {
 7 |       "task": "Write a shell script 'hello.sh' that prints 'hello'.",
 8 |       "summary": ""
 9 |     }
10 |   }
11 | }
12 | ```
13 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_002.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "write",
 4 |   "args": {
 5 |     "path": "hello.sh",
 6 |     "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'"
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_003.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "chmod +x hello.sh"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_004.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "./hello.sh",
5 |     "background": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_005.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "finish",
4 |   "args": {
5 |     "outputs": {}
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_006.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "delegate",
 4 |   "args": {
 5 |     "agent": "VerifierAgent",
 6 |     "inputs": {
 7 |       "task": "Verify that the shell script 'hello.sh' prints 'hello'."
 8 |     }
 9 |   }
10 | }
11 | ```
12 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_007.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "read",
4 |   "args": {
5 |     "path": "hello.sh"
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_008.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "run",
4 |   "args": {
5 |     "command": "bash hello.sh"
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_009.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "finish",
 4 |   "args": {
 5 |     "outputs": {
 6 |       "completed": true
 7 |     }
 8 |   }
 9 | }
10 | ```
11 | 


--------------------------------------------------------------------------------
/tests/integration/mock/ManagerAgent/test_write_simple_script/response_010.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "finish",
4 |   "args": {
5 |     "outputs": {}
6 |   }
7 | }
8 | ```
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/MonologueAgent/test_write_simple_script/response_001.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "write",
 4 |   "args": {
 5 |     "path": "hello.sh",
 6 |     "content": "#!/bin/bash\necho 'hello'"
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/MonologueAgent/test_write_simple_script/response_002.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "run",
 4 |   "args": {
 5 |     "command": "bash hello.sh",
 6 |     "background": false
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/MonologueAgent/test_write_simple_script/response_003.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "message",
 4 |   "args": {
 5 |     "content": "I have successfully created and executed the 'hello.sh' script, which printed 'hello' as expected. I believe I have completed the task as specified.",
 6 |     "wait_for_response": false
 7 |   }
 8 | }
 9 | ```
10 | 


--------------------------------------------------------------------------------
/tests/integration/mock/MonologueAgent/test_write_simple_script/response_004.log:
--------------------------------------------------------------------------------
1 | ```json
2 | {
3 |   "action": "finish",
4 |   "args": {}
5 | }
6 | ```
7 | 


--------------------------------------------------------------------------------
/tests/integration/mock/PlannerAgent/test_write_simple_script/response_001.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "add_task",
3 |   "args": {
4 |     "parent": "",
5 |     "goal": "Write a shell script 'hello.sh' that prints 'hello'",
6 |     "subtasks": []
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/integration/mock/PlannerAgent/test_write_simple_script/response_002.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "modify_task",
3 |   "args": {
4 |     "task_id": "0",
5 |     "state": "in_progress"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/PlannerAgent/test_write_simple_script/response_003.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "write",
3 |   "args": {
4 |     "path": "hello.sh",
5 |     "content": "#!/bin/bash\necho 'hello'"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/PlannerAgent/test_write_simple_script/response_004.log:
--------------------------------------------------------------------------------
1 | {
2 |   "action": "run",
3 |   "args": {
4 |     "command": "bash hello.sh",
5 |     "background": false
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/integration/mock/PlannerAgent/test_write_simple_script/response_005.log:
--------------------------------------------------------------------------------
 1 | ```json
 2 | {
 3 |   "action": "modify_task",
 4 |   "args": {
 5 |     "task_id": "0",
 6 |     "state": "completed",
 7 |     "thought": "The script 'hello.sh' was written and executed successfully, producing the expected output 'hello'."
 8 |   }
 9 | }
10 | ```
11 | 


--------------------------------------------------------------------------------
/tests/integration/start_http_server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from http.server import HTTPServer, SimpleHTTPRequestHandler
 3 | 
 4 | web_dir = os.path.join(os.path.dirname(__file__), 'static')
 5 | os.chdir(web_dir)
 6 | handler = SimpleHTTPRequestHandler
 7 | 
 8 | # Start the server
 9 | server = HTTPServer(('localhost', 8000), handler)
10 | server.serve_forever()
11 | 


--------------------------------------------------------------------------------
/tests/integration/static/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>The Ultimate Answer</title>
 7 |     <style>
 8 |         body {
 9 |             display: flex;
10 |             justify-content: center;
11 |             align-items: center;
12 |             height: 100vh;
13 |             margin: 0;
14 |             background: linear-gradient(to right, #1e3c72, #2a5298);
15 |             color: #fff;
16 |             font-family: 'Arial', sans-serif;
17 |             text-align: center;
18 |         }
19 |         .container {
20 |             text-align: center;
21 |             padding: 20px;
22 |             background: rgba(255, 255, 255, 0.1);
23 |             border-radius: 10px;
24 |             box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
25 |         }
26 |         h1 {
27 |             font-size: 36px;
28 |             margin-bottom: 20px;
29 |         }
30 |         p {
31 |             font-size: 18px;
32 |             margin-bottom: 30px;
33 |         }
34 |         #showButton {
35 |             padding: 10px 20px;
36 |             font-size: 16px;
37 |             color: #1e3c72;
38 |             background: #fff;
39 |             border: none;
40 |             border-radius: 5px;
41 |             cursor: pointer;
42 |             transition: background 0.3s ease;
43 |         }
44 |         #showButton:hover {
45 |             background: #f0f0f0;
46 |         }
47 |         #result {
48 |             margin-top: 20px;
49 |             font-size: 24px;
50 |         }
51 |     </style>
52 | </head>
53 | <body>
54 |     <div class="container">
55 |         <h1>The Ultimate Answer</h1>
56 |         <p>Click the button to reveal the answer to life, the universe, and everything.</p>
57 |         <button id="showButton">Click me</button>
58 |         <div id="result"></div>
59 |     </div>
60 |     <script>
61 |         document.getElementById('showButton').addEventListener('click', function() {
62 |             document.getElementById('result').innerText = 'The answer is OpenDevin is all you need!';
63 |         });
64 |     </script>
65 | </body>
66 | </html>
67 | 


--------------------------------------------------------------------------------
/tests/integration/workspace/test_edits/bad.txt:
--------------------------------------------------------------------------------
1 | This is a stupid typoo.
2 | Really?
3 | No mor typos!
4 | Enjoy!
5 | 


--------------------------------------------------------------------------------
/tests/test_fileops.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from opendevin.core.config import config
 6 | from opendevin.runtime.server import files
 7 | 
 8 | SANDBOX_PATH_PREFIX = '/workspace'
 9 | 
10 | 
11 | def test_resolve_path():
12 |     assert (
13 |         files.resolve_path('test.txt', '/workspace')
14 |         == Path(config.workspace_base) / 'test.txt'
15 |     )
16 |     assert (
17 |         files.resolve_path('subdir/test.txt', '/workspace')
18 |         == Path(config.workspace_base) / 'subdir' / 'test.txt'
19 |     )
20 |     assert (
21 |         files.resolve_path(Path(SANDBOX_PATH_PREFIX) / 'test.txt', '/workspace')
22 |         == Path(config.workspace_base) / 'test.txt'
23 |     )
24 |     assert (
25 |         files.resolve_path(
26 |             Path(SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt', '/workspace'
27 |         )
28 |         == Path(config.workspace_base) / 'subdir' / 'test.txt'
29 |     )
30 |     assert (
31 |         files.resolve_path(
32 |             Path(SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt', '/workspace'
33 |         )
34 |         == Path(config.workspace_base) / 'test.txt'
35 |     )
36 |     with pytest.raises(PermissionError):
37 |         files.resolve_path(Path(SANDBOX_PATH_PREFIX) / '..' / 'test.txt', '/workspace')
38 |     with pytest.raises(PermissionError):
39 |         files.resolve_path(Path('..') / 'test.txt', '/workspace')
40 |     with pytest.raises(PermissionError):
41 |         files.resolve_path(Path('/') / 'test.txt', '/workspace')
42 |     assert (
43 |         files.resolve_path('test.txt', '/workspace/test')
44 |         == Path(config.workspace_base) / 'test' / 'test.txt'
45 |     )
46 | 


--------------------------------------------------------------------------------
/tests/unit/README.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | 
 3 | This folder contains unit tests that could be run locally.
 4 | 
 5 | Run all test:
 6 | 
 7 | ```bash
 8 | poetry run pytest ./tests/unit
 9 | ```
10 | 
11 | Run specific test file:
12 | 
13 | ```bash
14 | poetry run pytest ./tests/unit/test_micro_agents.py
15 | ```
16 | 
17 | Run specific unit test
18 | 
19 | ```bash
20 | poetry run pytest ./tests/unit/test_micro_agents.py::test_coder_agent_with_summary
21 | ```
22 | 
23 | For a more verbose output, to above calls the `-v` flag can be used (even more verbose: `-vv` and `-vvv`):
24 | 
25 | ```bash
26 | poetry run pytest -v ./tests/unit/test_micro_agents.py
27 | ```
28 | 
29 | More details see [pytest doc](https://docs.pytest.org/en/latest/contents.html)
30 | 


--------------------------------------------------------------------------------
/tests/unit/test_arg_parser.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from opendevin.core.config import get_parser
 4 | 
 5 | 
 6 | def test_help_message(capsys):
 7 |     parser = get_parser()
 8 |     with pytest.raises(SystemExit):  # `--help` causes SystemExit
 9 |         parser.parse_args(['--help'])
10 |     captured = capsys.readouterr()
11 |     expected_help_message = """
12 | usage: pytest [-h] [-d DIRECTORY] [-t TASK] [-f FILE] [-c AGENT_CLS]
13 |               [-m MODEL_NAME] [-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK]
14 |               [-n MAX_CHARS] [--eval-output-dir EVAL_OUTPUT_DIR]
15 |               [--eval-n-limit EVAL_N_LIMIT]
16 |               [--eval-num-workers EVAL_NUM_WORKERS] [--eval-note EVAL_NOTE]
17 |               [-l LLM_CONFIG]
18 | 
19 | Run an agent with a specific task
20 | 
21 | options:
22 |   -h, --help            show this help message and exit
23 |   -d DIRECTORY, --directory DIRECTORY
24 |                         The working directory for the agent
25 |   -t TASK, --task TASK  The task for the agent to perform
26 |   -f FILE, --file FILE  Path to a file containing the task. Overrides -t if
27 |                         both are provided.
28 |   -c AGENT_CLS, --agent-cls AGENT_CLS
29 |                         The agent class to use
30 |   -m MODEL_NAME, --model-name MODEL_NAME
31 |                         The (litellm) model name to use
32 |   -i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS
33 |                         The maximum number of iterations to run the agent
34 |   -b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK
35 |                         The maximum budget allowed per task, beyond which the
36 |                         agent will stop.
37 |   -n MAX_CHARS, --max-chars MAX_CHARS
38 |                         The maximum number of characters to send to and
39 |                         receive from LLM per task
40 |   --eval-output-dir EVAL_OUTPUT_DIR
41 |                         The directory to save evaluation output
42 |   --eval-n-limit EVAL_N_LIMIT
43 |                         The number of instances to evaluate
44 |   --eval-num-workers EVAL_NUM_WORKERS
45 |                         The number of workers to use for evaluation
46 |   --eval-note EVAL_NOTE
47 |                         The note to add to the evaluation directory
48 |   -l LLM_CONFIG, --llm-config LLM_CONFIG
49 |                         The group of llm settings, e.g. a [llama3] section in
50 |                         the toml file. Overrides model if both are provided.
51 | """
52 | 
53 |     actual_lines = captured.out.strip().split('\n')
54 |     print('\n'.join(actual_lines))
55 |     expected_lines = expected_help_message.strip().split('\n')
56 | 
57 |     # Ensure both outputs have the same number of lines
58 |     assert len(actual_lines) == len(
59 |         expected_lines
60 |     ), 'The number of lines in the help message does not match.'
61 | 
62 |     # Compare each line
63 |     for actual, expected in zip(actual_lines, expected_lines):
64 |         assert (
65 |             actual.strip() == expected.strip()
66 |         ), f"Expected '{expected}', got '{actual}'"
67 | 


--------------------------------------------------------------------------------
/tests/unit/test_event_stream.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | 
 5 | from opendevin.events import EventSource, EventStream
 6 | from opendevin.events.action import NullAction
 7 | from opendevin.events.observation import NullObservation
 8 | 
 9 | 
10 | def collect_events(stream):
11 |     return [event for event in stream.get_events()]
12 | 
13 | 
14 | @pytest.mark.asyncio
15 | async def test_basic_flow():
16 |     stream = EventStream('abc')
17 |     await stream.add_event(NullAction(), EventSource.AGENT)
18 |     assert len(collect_events(stream)) == 1
19 | 
20 | 
21 | @pytest.mark.asyncio
22 | async def test_stream_storage():
23 |     stream = EventStream('def')
24 |     await stream.add_event(NullObservation(''), EventSource.AGENT)
25 |     assert len(collect_events(stream)) == 1
26 |     content = stream._file_store.read('sessions/def/events/0.json')
27 |     assert content is not None
28 |     data = json.loads(content)
29 |     assert 'timestamp' in data
30 |     del data['timestamp']
31 |     assert data == {
32 |         'id': 0,
33 |         'source': 'agent',
34 |         'observation': 'null',
35 |         'content': '',
36 |         'extras': {},
37 |         'message': 'No observation',
38 |     }
39 | 
40 | 
41 | @pytest.mark.asyncio
42 | async def test_rehydration():
43 |     stream1 = EventStream('es1')
44 |     await stream1.add_event(NullObservation('obs1'), EventSource.AGENT)
45 |     await stream1.add_event(NullObservation('obs2'), EventSource.AGENT)
46 |     assert len(collect_events(stream1)) == 2
47 | 
48 |     stream2 = EventStream('es2')
49 |     assert len(collect_events(stream2)) == 0
50 | 
51 |     stream1rehydrated = EventStream('es1')
52 |     events = collect_events(stream1rehydrated)
53 |     assert len(events) == 2
54 |     assert events[0].content == 'obs1'
55 |     assert events[1].content == 'obs2'
56 | 


--------------------------------------------------------------------------------
/tests/unit/test_json.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from opendevin.core.utils import json
 4 | from opendevin.events.action import MessageAction
 5 | 
 6 | 
 7 | def test_event_serialization_deserialization():
 8 |     message = MessageAction(content='This is a test.', wait_for_response=False)
 9 |     message._id = 42
10 |     message._timestamp = datetime(2020, 1, 1, 0, 0, 0)
11 |     serialized = json.dumps(message)
12 |     deserialized = json.loads(serialized)
13 |     expected = {
14 |         'id': 42,
15 |         'timestamp': '2020-01-01T00:00:00',
16 |         'action': 'message',
17 |         'message': 'This is a test.',
18 |         'args': {
19 |             'content': 'This is a test.',
20 |             'wait_for_response': False,
21 |         },
22 |     }
23 |     assert deserialized == expected
24 | 
25 | 
26 | def test_array_serialization_deserialization():
27 |     message = MessageAction(content='This is a test.', wait_for_response=False)
28 |     message._id = 42
29 |     message._timestamp = datetime(2020, 1, 1, 0, 0, 0)
30 |     serialized = json.dumps([message])
31 |     deserialized = json.loads(serialized)
32 |     expected = [
33 |         {
34 |             'id': 42,
35 |             'timestamp': '2020-01-01T00:00:00',
36 |             'action': 'message',
37 |             'message': 'This is a test.',
38 |             'args': {
39 |                 'content': 'This is a test.',
40 |                 'wait_for_response': False,
41 |             },
42 |         }
43 |     ]
44 |     assert deserialized == expected
45 | 


--------------------------------------------------------------------------------
/tests/unit/test_observation_serialization.py:
--------------------------------------------------------------------------------
 1 | from opendevin.events.observation import (
 2 |     CmdOutputObservation,
 3 |     Observation,
 4 | )
 5 | from opendevin.events.serialization import (
 6 |     event_from_dict,
 7 |     event_to_dict,
 8 |     event_to_memory,
 9 | )
10 | 
11 | 
12 | def serialization_deserialization(original_observation_dict, cls):
13 |     observation_instance = event_from_dict(original_observation_dict)
14 |     assert isinstance(
15 |         observation_instance, Observation
16 |     ), 'The observation instance should be an instance of Action.'
17 |     assert isinstance(
18 |         observation_instance, cls
19 |     ), 'The observation instance should be an instance of CmdOutputObservation.'
20 |     serialized_observation_dict = event_to_dict(observation_instance)
21 |     serialized_observation_memory = event_to_memory(observation_instance)
22 |     assert (
23 |         serialized_observation_dict == original_observation_dict
24 |     ), 'The serialized observation should match the original observation dict.'
25 |     original_observation_dict.pop('message', None)
26 |     original_observation_dict.pop('id', None)
27 |     original_observation_dict.pop('timestamp', None)
28 |     assert (
29 |         serialized_observation_memory == original_observation_dict
30 |     ), 'The serialized observation memory should match the original observation dict.'
31 | 
32 | 
33 | # Additional tests for various observation subclasses can be included here
34 | def test_observation_event_props_serialization_deserialization():
35 |     original_observation_dict = {
36 |         'id': 42,
37 |         'source': 'agent',
38 |         'timestamp': '2021-08-01T12:00:00',
39 |         'observation': 'run',
40 |         'message': 'Command `ls -l` executed with exit code 0.',
41 |         'extras': {'exit_code': 0, 'command': 'ls -l', 'command_id': 3},
42 |         'content': 'foo.txt',
43 |     }
44 |     serialization_deserialization(original_observation_dict, CmdOutputObservation)
45 | 
46 | 
47 | def test_command_output_observation_serialization_deserialization():
48 |     original_observation_dict = {
49 |         'observation': 'run',
50 |         'extras': {'exit_code': 0, 'command': 'ls -l', 'command_id': 3},
51 |         'message': 'Command `ls -l` executed with exit code 0.',
52 |         'content': 'foo.txt',
53 |     }
54 |     serialization_deserialization(original_observation_dict, CmdOutputObservation)
55 | 


--------------------------------------------------------------------------------
/tests/unit/test_storage.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | import pytest
 5 | 
 6 | from opendevin.storage.local import LocalFileStore
 7 | from opendevin.storage.memory import InMemoryFileStore
 8 | 
 9 | 
10 | @pytest.fixture
11 | def setup_env():
12 |     os.makedirs('./_test_files_tmp', exist_ok=True)
13 | 
14 |     yield
15 | 
16 |     shutil.rmtree('./_test_files_tmp')
17 | 
18 | 
19 | def test_basic_fileops(setup_env):
20 |     filename = 'test.txt'
21 |     for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]:
22 |         store.write(filename, 'Hello, world!')
23 |         assert store.read(filename) == 'Hello, world!'
24 |         assert store.list('') == [filename]
25 |         store.delete(filename)
26 |         with pytest.raises(FileNotFoundError):
27 |             store.read(filename)
28 | 
29 | 
30 | def test_complex_path_fileops(setup_env):
31 |     filenames = ['foo.bar.baz', './foo/bar/baz', 'foo/bar/baz', '/foo/bar/baz']
32 |     for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]:
33 |         for filename in filenames:
34 |             store.write(filename, 'Hello, world!')
35 |             assert store.read(filename) == 'Hello, world!'
36 |             store.delete(filename)
37 |             with pytest.raises(FileNotFoundError):
38 |                 store.read(filename)
39 | 
40 | 
41 | def test_list(setup_env):
42 |     for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]:
43 |         store.write('foo.txt', 'Hello, world!')
44 |         store.write('bar.txt', 'Hello, world!')
45 |         store.write('baz.txt', 'Hello, world!')
46 |         assert store.list('').sort() == ['foo.txt', 'bar.txt', 'baz.txt'].sort()
47 |         store.delete('foo.txt')
48 |         store.delete('bar.txt')
49 |         store.delete('baz.txt')
50 | 
51 | 
52 | def test_deep_list(setup_env):
53 |     for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]:
54 |         store.write('foo/bar/baz.txt', 'Hello, world!')
55 |         store.write('foo/bar/qux.txt', 'Hello, world!')
56 |         store.write('foo/bar/quux.txt', 'Hello, world!')
57 |         assert store.list('') == ['foo/'], f'for class {store.__class__}'
58 |         assert store.list('foo') == ['foo/bar/']
59 |         assert (
60 |             store.list('foo/bar').sort()
61 |             == ['foo/bar/baz.txt', 'foo/bar/qux.txt', 'foo/bar/quux.txt'].sort()
62 |         )
63 |         store.delete('foo/bar/baz.txt')
64 |         store.delete('foo/bar/qux.txt')
65 |         store.delete('foo/bar/quux.txt')
66 | 


--------------------------------------------------------------------------------