├── .gitattributes ├── .github ├── .codecov.yml ├── ISSUE_TEMPLATE │ ├── bug_template.yml │ ├── feature_request.md │ └── technical_proposal.md ├── dependabot.yml └── workflows │ ├── deploy-docs.yml │ ├── dummy-agent-test.yml │ ├── ghcr.yml │ ├── lint.yml │ ├── review-pr.yml │ ├── run-integration-tests.yml │ ├── run-unit-tests.yml │ ├── solve-issue.yml │ └── stale.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Development.md ├── LICENSE ├── LOG_VISUALIZER.md ├── Makefile ├── README.md ├── agenthub ├── __init__.py ├── browsing_agent │ ├── __init__.py │ ├── browsing_agent.py │ └── response_parser.py ├── dummy_web_agent │ ├── README.md │ ├── __init__.py │ ├── commands.txt │ ├── dummy_web_agent.py │ ├── prompt.py │ └── utils.py ├── reasoner_agent_fast │ ├── __init__.py │ └── reasoner_agent_fast.py └── reasoner_agent_full │ ├── __init__.py │ └── reasoner_agent_full.py ├── config.toml ├── dev_config └── python │ ├── .pre-commit-config.yaml │ ├── mypy.ini │ └── ruff.toml ├── easy-web-icon.png ├── easyweb ├── README.md ├── __init__.py ├── controller │ ├── __init__.py │ ├── action_parser.py │ ├── agent.py │ ├── agent_controller.py │ └── state │ │ ├── state.py │ │ └── task.py ├── core │ ├── config.py │ ├── const │ │ └── guide_url.py │ ├── download.py │ ├── exceptions.py │ ├── logger.py │ ├── main.py │ ├── metrics.py │ ├── schema │ │ ├── __init__.py │ │ ├── action.py │ │ ├── agent.py │ │ ├── config.py │ │ ├── observation.py │ │ └── stream.py │ └── utils │ │ ├── __init__.py │ │ ├── json.py │ │ └── singleton.py ├── events │ ├── __init__.py │ ├── action │ │ ├── __init__.py │ │ ├── action.py │ │ ├── agent.py │ │ ├── browse.py │ │ ├── commands.py │ │ ├── empty.py │ │ ├── files.py │ │ ├── message.py │ │ ├── planning.py │ │ └── tasks.py │ ├── event.py │ ├── observation │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── browse.py │ │ ├── commands.py │ │ ├── delegate.py │ │ ├── empty.py │ │ ├── error.py │ │ ├── files.py │ │ ├── observation.py │ │ ├── recall.py │ │ └── success.py │ ├── serialization │ │ ├── __init__.py │ │ ├── action.py │ │ ├── event.py │ │ ├── observation.py │ │ └── utils.py │ └── stream.py ├── llm │ ├── bedrock.py │ └── llm.py ├── memory │ ├── __init__.py │ ├── condenser.py │ ├── history.py │ └── memory.py ├── runtime │ ├── __init__.py │ ├── browser │ │ ├── __init__.py │ │ └── browser_env.py │ ├── docker │ │ ├── __init__.py │ │ ├── exec_box.py │ │ ├── local_box.py │ │ ├── process.py │ │ └── ssh_box.py │ ├── e2b │ │ ├── README.md │ │ ├── filestore.py │ │ ├── process.py │ │ ├── runtime.py │ │ └── sandbox.py │ ├── plugins │ │ ├── __init__.py │ │ ├── agent_skills │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── agentskills.py │ │ │ └── setup.sh │ │ ├── jupyter │ │ │ ├── __init__.py │ │ │ ├── execute_cli │ │ │ ├── execute_cli.py │ │ │ ├── execute_server │ │ │ └── setup.sh │ │ ├── mixin.py │ │ ├── requirement.py │ │ └── swe_agent_commands │ │ │ ├── __init__.py │ │ │ ├── _setup_cursor_mode_env.sh │ │ │ ├── _setup_default_env.sh │ │ │ ├── _split_string │ │ │ ├── cursors_defaults.sh │ │ │ ├── cursors_edit_linting.sh │ │ │ ├── defaults.sh │ │ │ ├── edit_linting.sh │ │ │ ├── parse_commands.py │ │ │ ├── search.sh │ │ │ ├── setup_cursor_mode.sh │ │ │ └── setup_default.sh │ ├── process.py │ ├── runtime.py │ ├── sandbox.py │ ├── server │ │ ├── browse.py │ │ ├── files.py │ │ └── runtime.py │ ├── tools.py │ └── utils │ │ ├── __init__.py │ │ ├── singleton.py │ │ └── system.py ├── server │ ├── README.md │ ├── __init__.py │ ├── auth │ │ ├── __init__.py │ │ └── auth.py │ ├── data_models │ │ └── feedback.py │ ├── listen.py │ ├── mock │ │ ├── README.md │ │ └── listen.py │ └── session │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── manager.py │ │ └── session.py └── storage │ ├── __init__.py │ ├── files.py │ ├── local.py │ ├── memory.py │ └── s3.py ├── frontend-icon.png ├── frontend.py ├── log_visualizer ├── controller.py ├── main.py └── session.py ├── model_port_config.json ├── my_log_visualizer.py ├── poetry.lock ├── pyproject.toml ├── pytest.ini └── tests ├── integration ├── README.md ├── conftest.py ├── mock │ ├── BrowsingAgent │ │ └── test_browse_internet │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ └── response_003.log │ ├── CodeActAgent │ │ ├── test_browse_internet │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── prompt_005.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ ├── response_004.log │ │ │ └── response_005.log │ │ ├── test_edits │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ └── response_003.log │ │ ├── test_ipython │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── response_001.log │ │ │ └── response_002.log │ │ ├── test_ipython_module │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ └── response_003.log │ │ └── test_write_simple_script │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ └── response_003.log │ ├── DelegatorAgent │ │ ├── test_edits │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── prompt_005.log │ │ │ ├── prompt_006.log │ │ │ ├── prompt_007.log │ │ │ ├── prompt_008.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ ├── response_004.log │ │ │ ├── response_005.log │ │ │ ├── response_006.log │ │ │ ├── response_007.log │ │ │ └── response_008.log │ │ └── test_write_simple_script │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── prompt_005.log │ │ │ ├── prompt_006.log │ │ │ ├── prompt_007.log │ │ │ ├── prompt_008.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ ├── response_004.log │ │ │ ├── response_005.log │ │ │ ├── response_006.log │ │ │ ├── response_007.log │ │ │ └── response_008.log │ ├── ManagerAgent │ │ ├── test_edits │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── prompt_005.log │ │ │ ├── prompt_006.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ ├── response_004.log │ │ │ ├── response_005.log │ │ │ └── response_006.log │ │ ├── test_simple_task_rejection │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── prompt_005.log │ │ │ ├── prompt_006.log │ │ │ ├── prompt_007.log │ │ │ ├── prompt_008.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ ├── response_004.log │ │ │ ├── response_005.log │ │ │ ├── response_006.log │ │ │ ├── response_007.log │ │ │ └── response_008.log │ │ └── test_write_simple_script │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── prompt_005.log │ │ │ ├── prompt_006.log │ │ │ ├── prompt_007.log │ │ │ ├── prompt_008.log │ │ │ ├── prompt_009.log │ │ │ ├── prompt_010.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ ├── response_004.log │ │ │ ├── response_005.log │ │ │ ├── response_006.log │ │ │ ├── response_007.log │ │ │ ├── response_008.log │ │ │ ├── response_009.log │ │ │ └── response_010.log │ ├── MonologueAgent │ │ └── test_write_simple_script │ │ │ ├── prompt_001.log │ │ │ ├── prompt_002.log │ │ │ ├── prompt_003.log │ │ │ ├── prompt_004.log │ │ │ ├── response_001.log │ │ │ ├── response_002.log │ │ │ ├── response_003.log │ │ │ └── response_004.log │ └── PlannerAgent │ │ └── test_write_simple_script │ │ ├── prompt_001.log │ │ ├── prompt_002.log │ │ ├── prompt_003.log │ │ ├── prompt_004.log │ │ ├── prompt_005.log │ │ ├── response_001.log │ │ ├── response_002.log │ │ ├── response_003.log │ │ ├── response_004.log │ │ └── response_005.log ├── regenerate.sh ├── start_http_server.py ├── static │ └── index.html ├── test_agent.py └── workspace │ └── test_edits │ └── bad.txt ├── test_fileops.py └── unit ├── README.md ├── test_action_serialization.py ├── test_agent_skill.py ├── test_arg_parser.py ├── test_config.py ├── test_event_stream.py ├── test_ipython.py ├── test_is_stuck.py ├── test_json.py ├── test_logging.py ├── test_micro_agents.py ├── test_observation_serialization.py ├── test_response_parsing.py ├── test_sandbox.py └── test_storage.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/.codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | notify: 3 | wait_for_ci: true 4 | 5 | coverage: 6 | status: 7 | patch: 8 | default: 9 | threshold: 100% # allow patch coverage to be lower than project coverage by any amount 10 | project: 11 | default: 12 | threshold: 5% # allow project coverage to drop at most 5% 13 | 14 | comment: false 15 | github_checks: 16 | annotations: false 17 | 18 | ignore: 19 | - "agenthub/SWE_agent/**" # SWE agent is deprecated 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_template.yml: -------------------------------------------------------------------------------- 1 | name: Bug 2 | description: Report a problem with OpenDevin 3 | title: '[Bug]: ' 4 | labels: ['bug'] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: Thank you for taking the time to fill out this bug report. We greatly appreciate your effort to complete this template fully. Please provide as much information as possible to help us understand and address the issue effectively. 9 | 10 | - type: checkboxes 11 | attributes: 12 | label: Is there an existing issue for the same bug? 13 | description: Please check if an issue already exists for the bug you encountered. 14 | options: 15 | - label: I have checked the troubleshooting document at https://opendevin.github.io/OpenDevin/modules/usage/troubleshooting 16 | required: true 17 | - label: I have checked the existing issues. 18 | required: true 19 | 20 | - type: textarea 21 | id: bug-description 22 | attributes: 23 | label: Describe the bug 24 | description: Provide a short description of the problem. 25 | validations: 26 | required: true 27 | 28 | - type: textarea 29 | id: current-version 30 | attributes: 31 | label: Current OpenDevin version 32 | description: What version of OpenDevin are you using? If you're running in docker, tell us the tag you're using (e.g. ghcr.io/opendevin/opendevin:0.3.1). 33 | render: bash 34 | validations: 35 | required: true 36 | 37 | - type: textarea 38 | id: config 39 | attributes: 40 | label: Installation and Configuration 41 | description: Please provide any commands you ran and any configuration (redacting API keys) 42 | render: bash 43 | validations: 44 | required: true 45 | 46 | - type: textarea 47 | id: model-agent 48 | attributes: 49 | label: Model and Agent 50 | description: What model and agent are you using? You can see these settings in the UI by clicking the settings wheel. 51 | placeholder: | 52 | - Model: 53 | - Agent: 54 | 55 | - type: textarea 56 | id: os-version 57 | attributes: 58 | label: Operating System 59 | description: What Operating System are you using? Linux, Mac OS, WSL on Windows 60 | 61 | - type: textarea 62 | id: repro-steps 63 | attributes: 64 | label: Reproduction Steps 65 | description: Please list the steps to reproduce the issue. 66 | placeholder: | 67 | 1. 68 | 2. 69 | 3. 70 | 71 | - type: textarea 72 | id: additional-context 73 | attributes: 74 | label: Logs, Errors, Screenshots, and Additional Context 75 | description: LLM logs will be stored in the `logs/llm/default` folder. Please add any additional context about the problem here. 76 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest an idea for OpenDevin features 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What problem or use case are you trying to solve?** 11 | 12 | **Describe the UX of the solution you'd like** 13 | 14 | **Do you have thoughts on the technical implementation?** 15 | 16 | **Describe alternatives you've considered** 17 | 18 | **Additional context** 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/technical_proposal.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Technical Proposal 3 | about: Propose a new architecture or technology 4 | title: '' 5 | labels: 'proposal' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Summary** 11 | 12 | **Motivation** 13 | 14 | **Technical Design** 15 | 16 | **Alternatives to Consider** 17 | 18 | **Additional context** 19 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | - package-ecosystem: "npm" # See documentation for possible values 13 | directory: "/frontend" # Location of package manifests 14 | schedule: 15 | interval: "daily" 16 | -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Docs to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | build: 13 | name: Build Docusaurus 14 | runs-on: ubuntu-latest 15 | if: github.repository == 'OpenDevin/OpenDevin' 16 | steps: 17 | - uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 20 | - uses: actions/setup-node@v4 21 | with: 22 | node-version: 18 23 | cache: npm 24 | cache-dependency-path: docs/package-lock.json 25 | - name: Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: "3.11" 29 | 30 | - name: Generate Python Docs 31 | run: rm -rf docs/modules/python && pip install pydoc-markdown && pydoc-markdown 32 | - name: Install dependencies 33 | run: cd docs && npm ci 34 | - name: Build website 35 | run: cd docs && npm run build 36 | 37 | - name: Upload Build Artifact 38 | if: github.ref == 'refs/heads/main' 39 | uses: actions/upload-pages-artifact@v3 40 | with: 41 | path: docs/build 42 | 43 | deploy: 44 | name: Deploy to GitHub Pages 45 | needs: build 46 | if: github.ref == 'refs/heads/main' && github.repository == 'OpenDevin/OpenDevin' 47 | # Grant GITHUB_TOKEN the permissions required to make a Pages deployment 48 | permissions: 49 | pages: write # to deploy to Pages 50 | id-token: write # to verify the deployment originates from an appropriate source 51 | # Deploy to the github-pages environment 52 | environment: 53 | name: github-pages 54 | url: ${{ steps.deployment.outputs.page_url }} 55 | runs-on: ubuntu-latest 56 | steps: 57 | - name: Deploy to GitHub Pages 58 | id: deployment 59 | uses: actions/deploy-pages@v4 60 | -------------------------------------------------------------------------------- /.github/workflows/dummy-agent-test.yml: -------------------------------------------------------------------------------- 1 | name: Run e2e test with dummy agent 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }} 5 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | pull_request: 12 | 13 | env: 14 | PERSIST_SANDBOX : "false" 15 | 16 | jobs: 17 | test: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: '3.11' 25 | - name: Set up environment 26 | run: | 27 | curl -sSL https://install.python-poetry.org | python3 - 28 | poetry install --without evaluation 29 | poetry run playwright install --with-deps chromium 30 | wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/ 31 | - name: Run tests 32 | run: | 33 | poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent 34 | -------------------------------------------------------------------------------- /.github/workflows/ghcr.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker Image 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }} 5 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | tags: 12 | - '*' 13 | pull_request: 14 | workflow_dispatch: 15 | inputs: 16 | reason: 17 | description: 'Reason for manual trigger' 18 | required: true 19 | default: '' 20 | 21 | jobs: 22 | ghcr_build_and_push: 23 | runs-on: ubuntu-latest 24 | 25 | permissions: 26 | contents: read 27 | packages: write 28 | 29 | strategy: 30 | matrix: 31 | image: ["app", "sandbox"] 32 | 33 | steps: 34 | - name: checkout 35 | uses: actions/checkout@v4 36 | 37 | - name: Free Disk Space (Ubuntu) 38 | uses: jlumbroso/free-disk-space@main 39 | with: 40 | # this might remove tools that are actually needed, 41 | # if set to "true" but frees about 6 GB 42 | tool-cache: true 43 | 44 | # all of these default to true, but feel free to set to 45 | # "false" if necessary for your workflow 46 | android: true 47 | dotnet: true 48 | haskell: true 49 | large-packages: true 50 | docker-images: false 51 | swap-storage: true 52 | 53 | - name: Set up QEMU 54 | uses: docker/setup-qemu-action@v3 55 | 56 | - name: Set up Docker Buildx 57 | id: buildx 58 | uses: docker/setup-buildx-action@v3 59 | 60 | - name: Login to ghcr 61 | uses: docker/login-action@v1 62 | with: 63 | registry: ghcr.io 64 | username: ${{ github.repository_owner }} 65 | password: ${{ secrets.GITHUB_TOKEN }} 66 | 67 | - name: Build and push ${{ matrix.image }} 68 | if: "!github.event.pull_request.head.repo.fork" 69 | run: | 70 | ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} --push 71 | 72 | - name: Build ${{ matrix.image }} 73 | if: "github.event.pull_request.head.repo.fork" 74 | run: | 75 | ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} 76 | 77 | docker_build_success: 78 | name: Docker Build Success 79 | runs-on: ubuntu-latest 80 | needs: ghcr_build_and_push 81 | steps: 82 | - run: echo Done! 83 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }} 5 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | pull_request: 12 | 13 | jobs: 14 | lint-frontend: 15 | name: Lint frontend 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Install Node.js 20 21 | uses: actions/setup-node@v4 22 | with: 23 | node-version: 20 24 | 25 | - name: Install dependencies 26 | run: | 27 | cd frontend 28 | npm install --frozen-lockfile 29 | 30 | - name: Lint 31 | run: | 32 | cd frontend 33 | npm run lint 34 | 35 | lint-python: 36 | name: Lint python 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v4 40 | with: 41 | fetch-depth: 0 42 | - name: Set up python 43 | uses: actions/setup-python@v5 44 | with: 45 | python-version: 3.11 46 | cache: 'pip' 47 | - name: Install pre-commit 48 | run: pip install pre-commit==3.7.0 49 | - name: Run pre-commit hooks 50 | run: pre-commit run --files opendevin/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml 51 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues' 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v9 11 | with: 12 | # Aggressively close issues that have been explicitly labeled `age-out` 13 | any-of-labels: age-out 14 | stale-issue-message: 'This issue is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 day.' 15 | close-issue-message: 'This issue was closed because it has been stalled for over 7 days with no activity.' 16 | stale-pr-message: 'This PR is stale because it has been open for 7 days with no activity. Remove stale label or comment or this will be closed in 1 days.' 17 | close-pr-message: 'This PR was closed because it has been stalled for over 7 days with no activity.' 18 | days-before-stale: 7 19 | days-before-close: 1 20 | 21 | - uses: actions/stale@v9 22 | with: 23 | # Be more lenient with other issues 24 | stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.' 25 | close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.' 26 | stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.' 27 | close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.' 28 | days-before-stale: 30 29 | days-before-close: 7 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © 2023 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /LOG_VISUALIZER.md: -------------------------------------------------------------------------------- 1 | The log visualizer allows you to visualize the history of each agent session. To produce the log, simply remember to hit "Clear" after the end of each session, whereupon the log of that session will be saved to the folder `frontend_log`. 2 | 3 | After that, run `python my_log_visualizer.py` to start the Gradio frontend for the visualization, where you can select the log file to visualize. 4 | 5 | The visualization will not only include the MCTS planning search tree, but also the state, active strategy, and action for steps where the agent does not replan. Feel free to take advantage of this to debug any reasoning errors (e.g., not recognizing the task is done). 6 | -------------------------------------------------------------------------------- /agenthub/__init__.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | load_dotenv() 4 | 5 | from . import ( # noqa: E402 6 | browsing_agent, 7 | dummy_web_agent, 8 | reasoner_agent_fast, 9 | reasoner_agent_full, 10 | ) 11 | 12 | __all__ = [ 13 | 'browsing_agent', 14 | 'dummy_web_agent', 15 | 'reasoner_agent_full', 16 | 'reasoner_agent_fast', 17 | ] 18 | -------------------------------------------------------------------------------- /agenthub/browsing_agent/__init__.py: -------------------------------------------------------------------------------- 1 | from easyweb.controller.agent import Agent 2 | 3 | from .browsing_agent import BrowsingAgent 4 | 5 | Agent.register('BrowsingAgent', BrowsingAgent) 6 | -------------------------------------------------------------------------------- /agenthub/dummy_web_agent/README.md: -------------------------------------------------------------------------------- 1 | # Browsing Agent Framework 2 | 3 | This folder implements the basic BrowserGym [demo agent](https://github.com/ServiceNow/BrowserGym/tree/main/demo_agent) that enables full-featured web browsing. 4 | 5 | 6 | ## Test run 7 | 8 | Note that for browsing tasks, GPT-4 is usually a requirement to get reasonable results, due to the complexity of the web page structures. 9 | 10 | ``` 11 | poetry run python ./opendevin/core/main.py \ 12 | -i 10 \ 13 | -t "tell me the usa's president using google search" \ 14 | -c BrowsingAgent \ 15 | -m gpt-4o-2024-05-13 16 | ``` 17 | -------------------------------------------------------------------------------- /agenthub/dummy_web_agent/__init__.py: -------------------------------------------------------------------------------- 1 | from easyweb.controller.agent import Agent 2 | 3 | from .dummy_web_agent import DummyWebAgent 4 | 5 | Agent.register('DummyWebAgent', DummyWebAgent) 6 | -------------------------------------------------------------------------------- /agenthub/dummy_web_agent/commands.txt: -------------------------------------------------------------------------------- 1 | poetry run python ./opendevin/core/main.py \ 2 | -i 10 \ 3 | -t "tell me the usa's president using google search" \ 4 | -c WorldModelAgent \ 5 | -m gpt-4o-2024-05-13 6 | -------------------------------------------------------------------------------- /agenthub/reasoner_agent_fast/__init__.py: -------------------------------------------------------------------------------- 1 | from easyweb.controller.agent import Agent 2 | 3 | from .reasoner_agent_fast import ReasonerAgentFast 4 | 5 | Agent.register('ReasonerAgentFast', ReasonerAgentFast) 6 | -------------------------------------------------------------------------------- /agenthub/reasoner_agent_fast/reasoner_agent_fast.py: -------------------------------------------------------------------------------- 1 | from reasoners import ReasonerAgent 2 | 3 | from easyweb.controller.agent import Agent 4 | from easyweb.controller.state.state import State 5 | from easyweb.core.logger import easyweb_logger as logger 6 | from easyweb.events.action import Action 7 | from easyweb.llm.llm import LLM 8 | from easyweb.runtime.plugins import ( 9 | PluginRequirement, 10 | ) 11 | from easyweb.runtime.tools import RuntimeTool 12 | 13 | 14 | class ReasonerAgentFast(Agent): 15 | VERSION = '0.1' 16 | """ 17 | An agent that uses agent model abstractions to interact with the browser. 18 | """ 19 | 20 | sandbox_plugins: list[PluginRequirement] = [] 21 | runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER] 22 | 23 | def __init__( 24 | self, 25 | llm: LLM, 26 | ) -> None: 27 | """ 28 | Initializes a new instance of the AbstractBrowsingAgent class. 29 | 30 | Parameters: 31 | - llm (Any): The llm to be used by this agent 32 | """ 33 | super().__init__(llm) 34 | 35 | self.config_name = 'easyweb' 36 | if 'gpt-4o-mini' in llm.model_name: 37 | self.config_name = 'easyweb_mini' 38 | 39 | if 'o1' in llm.model_name or 'o3-mini' in llm.model_name: 40 | llm = { 41 | 'default': LLM( 42 | model='gpt-4o', api_key=llm.api_key, base_url=llm.base_url 43 | ), 44 | 'policy': llm, 45 | } 46 | elif 'deepseek-reasoner' in llm.model_name: 47 | llm = { 48 | 'default': LLM( 49 | model='deepseek/deepseek-chat', 50 | api_key=llm.api_key, 51 | base_url=llm.base_url, 52 | ), 53 | 'policy': llm, 54 | } 55 | 56 | logger.info(f'Using {self.config_name}') 57 | self.agent = ReasonerAgent(llm, config_name=self.config_name, logger=logger) 58 | self.reset() 59 | 60 | def reset(self) -> None: 61 | """ 62 | Resets the agent. 63 | """ 64 | self.agent.reset() 65 | 66 | def step(self, env_state: State) -> Action: 67 | return self.agent.step(env_state) 68 | 69 | def search_memory(self, query: str) -> list[str]: 70 | raise NotImplementedError('Implement this abstract method') 71 | -------------------------------------------------------------------------------- /agenthub/reasoner_agent_full/__init__.py: -------------------------------------------------------------------------------- 1 | from easyweb.controller.agent import Agent 2 | 3 | from .reasoner_agent_full import ReasonerAgentFull 4 | 5 | Agent.register('ReasonerAgentFull', ReasonerAgentFull) 6 | -------------------------------------------------------------------------------- /agenthub/reasoner_agent_full/reasoner_agent_full.py: -------------------------------------------------------------------------------- 1 | from reasoners import ReasonerAgent 2 | 3 | from easyweb.controller.agent import Agent 4 | from easyweb.controller.state.state import State 5 | from easyweb.core.logger import easyweb_logger as logger 6 | from easyweb.events.action import Action 7 | from easyweb.llm.llm import LLM 8 | from easyweb.runtime.plugins import ( 9 | PluginRequirement, 10 | ) 11 | from easyweb.runtime.tools import RuntimeTool 12 | 13 | 14 | class ReasonerAgentFull(Agent): 15 | VERSION = '0.1' 16 | """ 17 | An agent that uses agent model abstractions to interact with the browser. 18 | """ 19 | 20 | sandbox_plugins: list[PluginRequirement] = [] 21 | runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER] 22 | 23 | def __init__( 24 | self, 25 | llm: LLM, 26 | ) -> None: 27 | """ 28 | Initializes a new instance of the AbstractBrowsingAgent class. 29 | 30 | Parameters: 31 | - llm (LLM): The llm to be used by this agent 32 | """ 33 | super().__init__(llm) 34 | if 'gpt-4o-mini' in llm.model_name: 35 | self.config_name = 'easyweb_mini_world_model' 36 | else: 37 | self.config_name = 'easyweb_world_model' 38 | 39 | logger.info(f'Using {self.config_name}') 40 | self.agent = ReasonerAgent(llm, config_name=self.config_name, logger=logger) 41 | self.reset() 42 | 43 | def reset(self) -> None: 44 | """ 45 | Resets the agent. 46 | """ 47 | self.agent.reset() 48 | 49 | def step(self, env_state: State) -> Action: 50 | return self.agent.step(env_state) 51 | 52 | def search_memory(self, query: str) -> list[str]: 53 | raise NotImplementedError('Implement this abstract method') 54 | -------------------------------------------------------------------------------- /config.toml: -------------------------------------------------------------------------------- 1 | [core] 2 | workspace_base="../workspace" 3 | persist_sandbox=false 4 | 5 | [llm] 6 | model_port_config_file="./model_port_config.json" 7 | -------------------------------------------------------------------------------- /dev_config/python/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: trailing-whitespace 6 | exclude: docs/modules/python 7 | - id: end-of-file-fixer 8 | exclude: docs/modules/python 9 | - id: check-yaml 10 | - id: debug-statements 11 | 12 | - repo: https://github.com/tox-dev/pyproject-fmt 13 | rev: 1.7.0 14 | hooks: 15 | - id: pyproject-fmt 16 | - repo: https://github.com/abravalheri/validate-pyproject 17 | rev: v0.16 18 | hooks: 19 | - id: validate-pyproject 20 | 21 | - repo: https://github.com/astral-sh/ruff-pre-commit 22 | # Ruff version. 23 | rev: v0.4.1 24 | hooks: 25 | # Run the linter. 26 | - id: ruff 27 | entry: ruff check --config dev_config/python/ruff.toml 28 | types_or: [python, pyi, jupyter] 29 | args: [--fix] 30 | # Run the formatter. 31 | - id: ruff-format 32 | entry: ruff format --config dev_config/python/ruff.toml 33 | types_or: [python, pyi, jupyter] 34 | 35 | # - repo: https://github.com/pre-commit/mirrors-mypy 36 | # rev: v1.9.0 37 | # hooks: 38 | # - id: mypy 39 | # additional_dependencies: 40 | # [types-requests, types-setuptools, types-pyyaml, types-toml] 41 | # entry: mypy --config-file dev_config/python/mypy.ini --exclude easyweb/__init__.py easyweb/ agenthub/ 42 | # always_run: true 43 | # pass_filenames: false 44 | -------------------------------------------------------------------------------- /dev_config/python/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | warn_unused_configs = True 3 | ignore_missing_imports = True 4 | check_untyped_defs = True 5 | explicit_package_bases = True 6 | warn_unreachable = True 7 | warn_redundant_casts = True 8 | no_implicit_optional = True 9 | strict_optional = True 10 | -------------------------------------------------------------------------------- /dev_config/python/ruff.toml: -------------------------------------------------------------------------------- 1 | [lint] 2 | select = [ 3 | "E", 4 | "W", 5 | "F", 6 | "I", 7 | "Q", 8 | "B", 9 | ] 10 | 11 | ignore = [ 12 | "E501", 13 | "B003", 14 | "B007", 15 | "B009", 16 | "B010", 17 | "B904", 18 | "B018", 19 | ] 20 | 21 | [lint.flake8-quotes] 22 | docstring-quotes = "double" 23 | inline-quotes = "single" 24 | 25 | [format] 26 | quote-style = "single" 27 | -------------------------------------------------------------------------------- /easy-web-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easy-web-icon.png -------------------------------------------------------------------------------- /easyweb/README.md: -------------------------------------------------------------------------------- 1 | # OpenDevin Shared Abstraction and Components 2 | 3 | This is a Python package that contains all the shared abstraction (e.g., Agent) and components (e.g., sandbox, web browser, search API, selenium). 4 | 5 | See the [main README](../README.md) for instructions on how to run OpenDevin from the command line. 6 | 7 | ## Sandbox Image 8 | ```bash 9 | docker build -f opendevin/sandbox/docker/Dockerfile -t opendevin/sandbox:v0.1 . 10 | ``` 11 | 12 | ## Sandbox Runner 13 | 14 | Run the docker-based interactive sandbox: 15 | 16 | ```bash 17 | mkdir workspace 18 | python3 opendevin/sandbox/docker/sandbox.py -d workspace 19 | ``` 20 | 21 | It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user. 22 | 23 | Example screenshot: 24 | 25 | image 26 | -------------------------------------------------------------------------------- /easyweb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/__init__.py -------------------------------------------------------------------------------- /easyweb/controller/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent_controller import AgentController 2 | 3 | __all__ = [ 4 | 'AgentController', 5 | ] 6 | -------------------------------------------------------------------------------- /easyweb/controller/action_parser.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from easyweb.events.action import Action 4 | 5 | 6 | class ResponseParser(ABC): 7 | """ 8 | This abstract base class is a general interface for an response parser dedicated to 9 | parsing the action from the response from the LLM. 10 | """ 11 | 12 | def __init__( 13 | self, 14 | ): 15 | # Need pay attention to the item order in self.action_parsers 16 | self.action_parsers = [] 17 | 18 | @abstractmethod 19 | def parse(self, response: str) -> Action: 20 | """ 21 | Parses the action from the response from the LLM. 22 | 23 | Parameters: 24 | - response (str): The response from the LLM. 25 | 26 | Returns: 27 | - action (Action): The action parsed from the response. 28 | """ 29 | pass 30 | 31 | @abstractmethod 32 | def parse_response(self, response) -> str: 33 | """ 34 | Parses the action from the response from the LLM. 35 | 36 | Parameters: 37 | - response (str): The response from the LLM. 38 | 39 | Returns: 40 | - action_str (str): The action str parsed from the response. 41 | """ 42 | pass 43 | 44 | @abstractmethod 45 | def parse_action(self, action_str: str) -> Action: 46 | """ 47 | Parses the action from the response from the LLM. 48 | 49 | Parameters: 50 | - action_str (str): The response from the LLM. 51 | 52 | Returns: 53 | - action (Action): The action parsed from the response. 54 | """ 55 | pass 56 | 57 | 58 | class ActionParser(ABC): 59 | """ 60 | This abstract base class is an general interface for an action parser dedicated to 61 | parsing the action from the action str from the LLM. 62 | """ 63 | 64 | @abstractmethod 65 | def check_condition(self, action_str: str) -> bool: 66 | """ 67 | Check if the action string can be parsed by this parser. 68 | """ 69 | pass 70 | 71 | @abstractmethod 72 | def parse(self, action_str: str) -> Action: 73 | """ 74 | Parses the action from the action string from the LLM response. 75 | """ 76 | pass 77 | -------------------------------------------------------------------------------- /easyweb/core/const/guide_url.py: -------------------------------------------------------------------------------- 1 | TROUBLESHOOTING_URL = ( 2 | 'https://opendevin.github.io/OpenDevin/modules/usage/troubleshooting' 3 | ) 4 | -------------------------------------------------------------------------------- /easyweb/core/download.py: -------------------------------------------------------------------------------- 1 | # Run this file to trigger a model download 2 | import agenthub # noqa F401 (we import this to get the agents registered) 3 | -------------------------------------------------------------------------------- /easyweb/core/exceptions.py: -------------------------------------------------------------------------------- 1 | class MaxCharsExceedError(Exception): 2 | def __init__(self, num_of_chars=None, max_chars_limit=None): 3 | if num_of_chars is not None and max_chars_limit is not None: 4 | message = f'Number of characters {num_of_chars} exceeds MAX_CHARS limit: {max_chars_limit}' 5 | else: 6 | message = 'Number of characters exceeds MAX_CHARS limit' 7 | super().__init__(message) 8 | 9 | 10 | class AgentNoInstructionError(Exception): 11 | def __init__(self, message='Instruction must be provided'): 12 | super().__init__(message) 13 | 14 | 15 | class AgentEventTypeError(Exception): 16 | def __init__(self, message='Event must be a dictionary'): 17 | super().__init__(message) 18 | 19 | 20 | class AgentAlreadyRegisteredError(Exception): 21 | def __init__(self, name=None): 22 | if name is not None: 23 | message = f"Agent class already registered under '{name}'" 24 | else: 25 | message = 'Agent class already registered' 26 | super().__init__(message) 27 | 28 | 29 | class AgentNotRegisteredError(Exception): 30 | def __init__(self, name=None): 31 | if name is not None: 32 | message = f"No agent class registered under '{name}'" 33 | else: 34 | message = 'No agent class registered' 35 | super().__init__(message) 36 | 37 | 38 | class LLMOutputError(Exception): 39 | def __init__(self, message): 40 | super().__init__(message) 41 | 42 | 43 | class SandboxInvalidBackgroundCommandError(Exception): 44 | def __init__(self, id=None): 45 | if id is not None: 46 | message = f'Invalid background command id {id}' 47 | else: 48 | message = 'Invalid background command id' 49 | super().__init__(message) 50 | 51 | 52 | class TaskInvalidStateError(Exception): 53 | def __init__(self, state=None): 54 | if state is not None: 55 | message = f'Invalid state {state}' 56 | else: 57 | message = 'Invalid state' 58 | super().__init__(message) 59 | 60 | 61 | class BrowserInitException(Exception): 62 | def __init__(self, message='Failed to initialize browser environment'): 63 | super().__init__(message) 64 | 65 | 66 | class BrowserUnavailableException(Exception): 67 | def __init__( 68 | self, 69 | message='Browser environment is not available, please check if has been initialized', 70 | ): 71 | super().__init__(message) 72 | 73 | 74 | # These exceptions get sent back to the LLM 75 | class AgentMalformedActionError(Exception): 76 | def __init__(self, message='Malformed response'): 77 | super().__init__(message) 78 | 79 | 80 | class AgentNoActionError(Exception): 81 | def __init__(self, message='Agent must return an action'): 82 | super().__init__(message) 83 | -------------------------------------------------------------------------------- /easyweb/core/metrics.py: -------------------------------------------------------------------------------- 1 | class Metrics: 2 | """ 3 | Metrics class can record various metrics during running and evaluation. 4 | Currently we define the following metrics: 5 | accumulated_cost: the total cost (USD $) of the current LLM. 6 | """ 7 | 8 | def __init__(self) -> None: 9 | self._accumulated_cost: float = 0.0 10 | self._costs: list[float] = [] 11 | 12 | @property 13 | def accumulated_cost(self) -> float: 14 | return self._accumulated_cost 15 | 16 | @accumulated_cost.setter 17 | def accumulated_cost(self, value: float) -> None: 18 | if value < 0: 19 | raise ValueError('Total cost cannot be negative.') 20 | self._accumulated_cost = value 21 | 22 | @property 23 | def costs(self) -> list: 24 | return self._costs 25 | 26 | def add_cost(self, value: float) -> None: 27 | if value < 0: 28 | raise ValueError('Added cost cannot be negative.') 29 | self._accumulated_cost += value 30 | self._costs.append(value) 31 | 32 | def get(self): 33 | """ 34 | Return the metrics in a dictionary. 35 | """ 36 | return {'accumulated_cost': self._accumulated_cost, 'costs': self._costs} 37 | 38 | def log(self): 39 | """ 40 | Log the metrics. 41 | """ 42 | metrics = self.get() 43 | logs = '' 44 | for key, value in metrics.items(): 45 | logs += f'{key}: {value}\n' 46 | return logs 47 | -------------------------------------------------------------------------------- /easyweb/core/schema/__init__.py: -------------------------------------------------------------------------------- 1 | from .action import ActionType 2 | from .agent import AgentState 3 | from .config import ConfigType 4 | from .observation import ObservationType 5 | from .stream import CancellableStream, StreamMixin 6 | 7 | __all__ = [ 8 | 'ActionType', 9 | 'ObservationType', 10 | 'ConfigType', 11 | 'AgentState', 12 | 'CancellableStream', 13 | 'StreamMixin', 14 | ] 15 | -------------------------------------------------------------------------------- /easyweb/core/schema/action.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | __all__ = ['ActionType'] 4 | 5 | 6 | class ActionTypeSchema(BaseModel): 7 | INIT: str = Field(default='initialize') 8 | """Initializes the agent. Only sent by client. 9 | """ 10 | 11 | MESSAGE: str = Field(default='message') 12 | """Represents a message. 13 | """ 14 | 15 | START: str = Field(default='start') 16 | """Starts a new development task OR send chat from the user. Only sent by the client. 17 | """ 18 | 19 | READ: str = Field(default='read') 20 | """Reads the content of a file. 21 | """ 22 | 23 | WRITE: str = Field(default='write') 24 | """Writes the content to a file. 25 | """ 26 | 27 | RUN: str = Field(default='run') 28 | """Runs a command. 29 | """ 30 | 31 | RUN_IPYTHON: str = Field(default='run_ipython') 32 | """Runs a IPython cell. 33 | """ 34 | 35 | KILL: str = Field(default='kill') 36 | """Kills a background command. 37 | """ 38 | 39 | BROWSE: str = Field(default='browse') 40 | """Opens a web page. 41 | """ 42 | 43 | BROWSE_INTERACTIVE: str = Field(default='browse_interactive') 44 | """Interact with the browser instance. 45 | """ 46 | 47 | RECALL: str = Field(default='recall') 48 | """Searches long-term memory 49 | """ 50 | 51 | DELEGATE: str = Field(default='delegate') 52 | """Delegates a task to another agent. 53 | """ 54 | 55 | FINISH: str = Field(default='finish') 56 | """If you're absolutely certain that you've completed your task and have tested your work, 57 | use the finish action to stop working. 58 | """ 59 | 60 | REJECT: str = Field(default='reject') 61 | """If you're absolutely certain that you cannot complete the task with given requirements, 62 | use the reject action to stop working. 63 | """ 64 | 65 | NULL: str = Field(default='null') 66 | 67 | SUMMARIZE: str = Field(default='summarize') 68 | 69 | ADD_TASK: str = Field(default='add_task') 70 | 71 | MODIFY_TASK: str = Field(default='modify_task') 72 | 73 | PAUSE: str = Field(default='pause') 74 | """Pauses the task. 75 | """ 76 | 77 | RESUME: str = Field(default='resume') 78 | """Resumes the task. 79 | """ 80 | 81 | STOP: str = Field(default='stop') 82 | """Stops the task. Must send a start action to restart a new task. 83 | """ 84 | 85 | CHANGE_AGENT_STATE: str = Field(default='change_agent_state') 86 | 87 | PUSH: str = Field(default='push') 88 | """Push a branch to github.""" 89 | 90 | SEND_PR: str = Field(default='send_pr') 91 | """Send a PR to github.""" 92 | 93 | START_PLANNING: str = Field(default='start_planning') 94 | """Start planning for the next action""" 95 | 96 | FINISH_PLANNING: str = Field(default='finish_planning') 97 | """Finish planning for the next action""" 98 | 99 | 100 | ActionType = ActionTypeSchema() 101 | -------------------------------------------------------------------------------- /easyweb/core/schema/agent.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class AgentState(str, Enum): 5 | LOADING = 'loading' 6 | """The agent is loading. 7 | """ 8 | 9 | INIT = 'init' 10 | """The agent is initialized. 11 | """ 12 | 13 | RUNNING = 'running' 14 | """The agent is running. 15 | """ 16 | 17 | AWAITING_USER_INPUT = 'awaiting_user_input' 18 | """The agent is awaiting user input. 19 | """ 20 | 21 | PAUSED = 'paused' 22 | """The agent is paused. 23 | """ 24 | 25 | STOPPED = 'stopped' 26 | """The agent is stopped. 27 | """ 28 | 29 | FINISHED = 'finished' 30 | """The agent is finished with the current task. 31 | """ 32 | 33 | REJECTED = 'rejected' 34 | """The agent rejects the task. 35 | """ 36 | 37 | ERROR = 'error' 38 | """An error occurred during the task. 39 | """ 40 | -------------------------------------------------------------------------------- /easyweb/core/schema/config.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ConfigType(str, Enum): 5 | # For frontend 6 | LLM_CUSTOM_LLM_PROVIDER = 'LLM_CUSTOM_LLM_PROVIDER' 7 | LLM_MAX_INPUT_TOKENS = 'LLM_MAX_INPUT_TOKENS' 8 | LLM_MAX_OUTPUT_TOKENS = 'LLM_MAX_OUTPUT_TOKENS' 9 | LLM_TOP_P = 'LLM_TOP_P' 10 | LLM_TEMPERATURE = 'LLM_TEMPERATURE' 11 | LLM_TIMEOUT = 'LLM_TIMEOUT' 12 | LLM_API_KEY = 'LLM_API_KEY' 13 | LLM_BASE_URL = 'LLM_BASE_URL' 14 | AWS_ACCESS_KEY_ID = 'AWS_ACCESS_KEY_ID' 15 | AWS_SECRET_ACCESS_KEY = 'AWS_SECRET_ACCESS_KEY' 16 | AWS_REGION_NAME = 'AWS_REGION_NAME' 17 | WORKSPACE_BASE = 'WORKSPACE_BASE' 18 | WORKSPACE_MOUNT_PATH = 'WORKSPACE_MOUNT_PATH' 19 | WORKSPACE_MOUNT_REWRITE = 'WORKSPACE_MOUNT_REWRITE' 20 | WORKSPACE_MOUNT_PATH_IN_SANDBOX = 'WORKSPACE_MOUNT_PATH_IN_SANDBOX' 21 | CACHE_DIR = 'CACHE_DIR' 22 | LLM_MODEL = 'LLM_MODEL' 23 | SANDBOX_CONTAINER_IMAGE = 'SANDBOX_CONTAINER_IMAGE' 24 | RUN_AS_DEVIN = 'RUN_AS_DEVIN' 25 | LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL' 26 | LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL' 27 | LLM_EMBEDDING_DEPLOYMENT_NAME = 'LLM_EMBEDDING_DEPLOYMENT_NAME' 28 | LLM_API_VERSION = 'LLM_API_VERSION' 29 | LLM_NUM_RETRIES = 'LLM_NUM_RETRIES' 30 | LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT' 31 | LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT' 32 | AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS' 33 | AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED' 34 | MAX_ITERATIONS = 'MAX_ITERATIONS' 35 | MAX_CHARS = 'MAX_CHARS' 36 | AGENT = 'AGENT' 37 | E2B_API_KEY = 'E2B_API_KEY' 38 | SANDBOX_TYPE = 'SANDBOX_TYPE' 39 | SANDBOX_USER_ID = 'SANDBOX_USER_ID' 40 | SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT' 41 | USE_HOST_NETWORK = 'USE_HOST_NETWORK' 42 | SSH_HOSTNAME = 'SSH_HOSTNAME' 43 | DISABLE_COLOR = 'DISABLE_COLOR' 44 | DEBUG = 'DEBUG' 45 | -------------------------------------------------------------------------------- /easyweb/core/schema/observation.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | __all__ = ['ObservationType'] 4 | 5 | 6 | class ObservationTypeSchema(BaseModel): 7 | READ: str = Field(default='read') 8 | """The content of a file 9 | """ 10 | 11 | WRITE: str = Field(default='write') 12 | 13 | BROWSE: str = Field(default='browse') 14 | """The HTML content of a URL 15 | """ 16 | 17 | RUN: str = Field(default='run') 18 | """The output of a command 19 | """ 20 | 21 | RUN_IPYTHON: str = Field(default='run_ipython') 22 | """Runs a IPython cell. 23 | """ 24 | 25 | RECALL: str = Field(default='recall') 26 | """The result of a search 27 | """ 28 | 29 | CHAT: str = Field(default='chat') 30 | """A message from the user 31 | """ 32 | 33 | DELEGATE: str = Field(default='delegate') 34 | """The result of a task delegated to another agent 35 | """ 36 | 37 | MESSAGE: str = Field(default='message') 38 | 39 | ERROR: str = Field(default='error') 40 | 41 | SUCCESS: str = Field(default='success') 42 | 43 | NULL: str = Field(default='null') 44 | 45 | AGENT_STATE_CHANGED: str = Field(default='agent_state_changed') 46 | 47 | 48 | ObservationType = ObservationTypeSchema() 49 | -------------------------------------------------------------------------------- /easyweb/core/schema/stream.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Union 3 | 4 | 5 | class StreamMixin: 6 | def __init__(self, generator): 7 | self.generator = generator 8 | self.closed = False 9 | 10 | def __iter__(self): 11 | return self 12 | 13 | def __next__(self): 14 | if self.closed: 15 | raise StopIteration 16 | else: 17 | return next(self.generator) 18 | 19 | 20 | class CancellableStream(StreamMixin, ABC): 21 | @abstractmethod 22 | def close(self): 23 | pass 24 | 25 | @abstractmethod 26 | def exit_code(self) -> Union[int, None]: 27 | pass 28 | -------------------------------------------------------------------------------- /easyweb/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .singleton import Singleton 2 | 3 | __all__ = ['Singleton'] 4 | -------------------------------------------------------------------------------- /easyweb/core/utils/json.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime 3 | 4 | from json_repair import repair_json 5 | 6 | from easyweb.core.exceptions import LLMOutputError 7 | from easyweb.events.event import Event 8 | from easyweb.events.serialization import event_to_dict 9 | 10 | 11 | def my_default_encoder(obj): 12 | """ 13 | Custom JSON encoder that handles datetime and event objects 14 | """ 15 | if isinstance(obj, datetime): 16 | return obj.isoformat() 17 | if isinstance(obj, Event): 18 | return event_to_dict(obj) 19 | return json.JSONEncoder().default(obj) 20 | 21 | 22 | def dumps(obj, **kwargs): 23 | """ 24 | Serialize an object to str format 25 | """ 26 | 27 | return json.dumps(obj, default=my_default_encoder, **kwargs) 28 | 29 | 30 | def loads(json_str, **kwargs): 31 | """ 32 | Create a JSON object from str 33 | """ 34 | try: 35 | return json.loads(json_str, **kwargs) 36 | except json.JSONDecodeError: 37 | pass 38 | depth = 0 39 | start = -1 40 | for i, char in enumerate(json_str): 41 | if char == '{': 42 | if depth == 0: 43 | start = i 44 | depth += 1 45 | elif char == '}': 46 | depth -= 1 47 | if depth == 0 and start != -1: 48 | response = json_str[start : i + 1] 49 | try: 50 | json_str = repair_json(response) 51 | return json.loads(json_str, **kwargs) 52 | except (json.JSONDecodeError, ValueError, TypeError) as e: 53 | raise LLMOutputError( 54 | 'Invalid JSON in response. Please make sure the response is a valid JSON object.' 55 | ) from e 56 | raise LLMOutputError('No valid JSON object found in response.') 57 | -------------------------------------------------------------------------------- /easyweb/core/utils/singleton.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | 3 | 4 | class Singleton(type): 5 | _instances: dict = {} 6 | 7 | def __call__(cls, *args, **kwargs): 8 | if cls not in cls._instances: 9 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 10 | else: 11 | # allow updates, just update existing instance 12 | # perhaps not the most orthodox way to do it, though it simplifies client code 13 | # useful for pre-defined groups of settings 14 | instance = cls._instances[cls] 15 | for key, value in kwargs.items(): 16 | setattr(instance, key, value) 17 | return cls._instances[cls] 18 | 19 | @classmethod 20 | def reset(cls): 21 | # used by pytest to reset the state of the singleton instances 22 | for instance_type, instance in cls._instances.items(): 23 | print('resetting... ', instance_type) 24 | for field in dataclasses.fields(instance_type): 25 | if dataclasses.is_dataclass(field.type): 26 | setattr(instance, field.name, field.type()) 27 | else: 28 | setattr(instance, field.name, field.default) 29 | -------------------------------------------------------------------------------- /easyweb/events/__init__.py: -------------------------------------------------------------------------------- 1 | from .event import Event, EventSource 2 | from .stream import EventStream, EventStreamSubscriber 3 | 4 | __all__ = [ 5 | 'Event', 6 | 'EventSource', 7 | 'EventStream', 8 | 'EventStreamSubscriber', 9 | ] 10 | -------------------------------------------------------------------------------- /easyweb/events/action/__init__.py: -------------------------------------------------------------------------------- 1 | from .action import Action 2 | from .agent import ( 3 | AgentDelegateAction, 4 | AgentFinishAction, 5 | AgentRecallAction, 6 | AgentRejectAction, 7 | AgentSummarizeAction, 8 | ChangeAgentStateAction, 9 | ) 10 | from .browse import BrowseInteractiveAction, BrowseURLAction 11 | from .commands import CmdKillAction, CmdRunAction, IPythonRunCellAction 12 | from .empty import NullAction 13 | from .files import FileReadAction, FileWriteAction 14 | from .message import MessageAction 15 | from .planning import FinishPlanningAction, StartPlanningAction 16 | from .tasks import AddTaskAction, ModifyTaskAction 17 | 18 | __all__ = [ 19 | 'Action', 20 | 'NullAction', 21 | 'CmdRunAction', 22 | 'CmdKillAction', 23 | 'BrowseURLAction', 24 | 'BrowseInteractiveAction', 25 | 'FileReadAction', 26 | 'FileWriteAction', 27 | 'AgentRecallAction', 28 | 'AgentFinishAction', 29 | 'AgentRejectAction', 30 | 'AgentDelegateAction', 31 | 'AgentSummarizeAction', 32 | 'AddTaskAction', 33 | 'ModifyTaskAction', 34 | 'ChangeAgentStateAction', 35 | 'IPythonRunCellAction', 36 | 'MessageAction', 37 | 'StartPlanningAction', 38 | 'FinishPlanningAction', 39 | ] 40 | -------------------------------------------------------------------------------- /easyweb/events/action/action.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from easyweb.events.event import Event 5 | 6 | 7 | @dataclass 8 | class Action(Event): 9 | runnable: ClassVar[bool] = False 10 | -------------------------------------------------------------------------------- /easyweb/events/action/agent.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import ClassVar 3 | 4 | from easyweb.core.schema import ActionType 5 | 6 | from .action import Action 7 | 8 | 9 | @dataclass 10 | class ChangeAgentStateAction(Action): 11 | """Fake action, just to notify the client that a task state has changed.""" 12 | 13 | agent_state: str 14 | thought: str = '' 15 | action: str = ActionType.CHANGE_AGENT_STATE 16 | 17 | @property 18 | def message(self) -> str: 19 | return f'Agent state changed to {self.agent_state}' 20 | 21 | 22 | @dataclass 23 | class AgentRecallAction(Action): 24 | query: str 25 | thought: str = '' 26 | action: str = ActionType.RECALL 27 | runnable: ClassVar[bool] = True 28 | 29 | @property 30 | def message(self) -> str: 31 | return f"Let me dive into my memories to find what you're looking for! Searching for: '{self.query}'. This might take a moment." 32 | 33 | 34 | @dataclass 35 | class AgentSummarizeAction(Action): 36 | summary: str 37 | action: str = ActionType.SUMMARIZE 38 | 39 | @property 40 | def message(self) -> str: 41 | return self.summary 42 | 43 | def __str__(self) -> str: 44 | ret = '**AgentSummarizeAction**\n' 45 | ret += f'SUMMARY: {self.summary}' 46 | return ret 47 | 48 | 49 | @dataclass 50 | class AgentFinishAction(Action): 51 | outputs: dict = field(default_factory=dict) 52 | thought: str = '' 53 | action: str = ActionType.FINISH 54 | 55 | @property 56 | def message(self) -> str: 57 | # return "All done! What's next on the agenda?" 58 | return 'Task complete! How can I assist you next?' 59 | 60 | 61 | @dataclass 62 | class AgentRejectAction(Action): 63 | outputs: dict = field(default_factory=dict) 64 | thought: str = '' 65 | action: str = ActionType.REJECT 66 | 67 | @property 68 | def message(self) -> str: 69 | return 'Task is rejected by the agent.' 70 | 71 | 72 | @dataclass 73 | class AgentDelegateAction(Action): 74 | agent: str 75 | inputs: dict 76 | thought: str = '' 77 | action: str = ActionType.DELEGATE 78 | 79 | @property 80 | def message(self) -> str: 81 | return f"I'm asking {self.agent} for help with this task." 82 | -------------------------------------------------------------------------------- /easyweb/events/action/browse.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from easyweb.core.schema import ActionType 5 | 6 | from .action import Action 7 | 8 | 9 | @dataclass 10 | class BrowseURLAction(Action): 11 | url: str 12 | thought: str = '' 13 | action: str = ActionType.BROWSE 14 | runnable: ClassVar[bool] = True 15 | 16 | @property 17 | def message(self) -> str: 18 | return f'Browsing URL: {self.url}' 19 | 20 | def __str__(self) -> str: 21 | ret = '**BrowseURLAction**\n' 22 | if self.thought: 23 | ret += f'THOUGHT: {self.thought}\n' 24 | ret += f'URL: {self.url}' 25 | return ret 26 | 27 | 28 | @dataclass 29 | class BrowseInteractiveAction(Action): 30 | browser_actions: str 31 | thought: str = '' 32 | browsergym_send_msg_to_user: str = '' 33 | action: str = ActionType.BROWSE_INTERACTIVE 34 | runnable: ClassVar[bool] = True 35 | 36 | @property 37 | def message(self) -> str: 38 | return f'Executing browser actions: {self.browser_actions}' 39 | 40 | def __str__(self) -> str: 41 | ret = '**BrowseInteractiveAction**\n' 42 | if self.thought: 43 | ret += f'THOUGHT: {self.thought}\n' 44 | ret += f'BROWSER_ACTIONS: {self.browser_actions}' 45 | return ret 46 | -------------------------------------------------------------------------------- /easyweb/events/action/commands.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from easyweb.core.schema import ActionType 5 | 6 | from .action import Action 7 | 8 | 9 | @dataclass 10 | class CmdRunAction(Action): 11 | command: str 12 | background: bool = False 13 | thought: str = '' 14 | action: str = ActionType.RUN 15 | runnable: ClassVar[bool] = True 16 | 17 | @property 18 | def message(self) -> str: 19 | return f'Running command: {self.command}' 20 | 21 | def __str__(self) -> str: 22 | ret = '**CmdRunAction**\n' 23 | if self.thought: 24 | ret += f'THOUGHT: {self.thought}\n' 25 | ret += f'COMMAND:\n{self.command}' 26 | return ret 27 | 28 | 29 | @dataclass 30 | class CmdKillAction(Action): 31 | command_id: int 32 | thought: str = '' 33 | action: str = ActionType.KILL 34 | runnable: ClassVar[bool] = True 35 | 36 | @property 37 | def message(self) -> str: 38 | return f'Killing command: {self.command_id}' 39 | 40 | def __str__(self) -> str: 41 | return f'**CmdKillAction**\n{self.command_id}' 42 | 43 | 44 | @dataclass 45 | class IPythonRunCellAction(Action): 46 | code: str 47 | thought: str = '' 48 | action: str = ActionType.RUN_IPYTHON 49 | runnable: ClassVar[bool] = True 50 | kernel_init_code: str = '' # code to run in the kernel (if the kernel is restarted) 51 | 52 | def __str__(self) -> str: 53 | ret = '**IPythonRunCellAction**\n' 54 | if self.thought: 55 | ret += f'THOUGHT: {self.thought}\n' 56 | ret += f'CODE:\n{self.code}' 57 | return ret 58 | 59 | @property 60 | def message(self) -> str: 61 | return f'Running Python code interactively: {self.code}' 62 | -------------------------------------------------------------------------------- /easyweb/events/action/empty.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ActionType 4 | 5 | from .action import Action 6 | 7 | 8 | @dataclass 9 | class NullAction(Action): 10 | """An action that does nothing.""" 11 | 12 | action: str = ActionType.NULL 13 | 14 | @property 15 | def message(self) -> str: 16 | return 'No action' 17 | -------------------------------------------------------------------------------- /easyweb/events/action/files.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from easyweb.core.schema import ActionType 5 | 6 | from .action import Action 7 | 8 | 9 | @dataclass 10 | class FileReadAction(Action): 11 | """ 12 | Reads a file from a given path. 13 | Can be set to read specific lines using start and end 14 | Default lines 0:-1 (whole file) 15 | """ 16 | 17 | path: str 18 | start: int = 0 19 | end: int = -1 20 | thought: str = '' 21 | action: str = ActionType.READ 22 | runnable: ClassVar[bool] = True 23 | 24 | @property 25 | def message(self) -> str: 26 | return f'Reading file: {self.path}' 27 | 28 | 29 | @dataclass 30 | class FileWriteAction(Action): 31 | path: str 32 | content: str 33 | start: int = 0 34 | end: int = -1 35 | thought: str = '' 36 | action: str = ActionType.WRITE 37 | runnable: ClassVar[bool] = True 38 | 39 | @property 40 | def message(self) -> str: 41 | return f'Writing file: {self.path}' 42 | -------------------------------------------------------------------------------- /easyweb/events/action/message.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ActionType 4 | 5 | from .action import Action 6 | 7 | 8 | @dataclass 9 | class MessageAction(Action): 10 | content: str 11 | wait_for_response: bool = False 12 | action: str = ActionType.MESSAGE 13 | 14 | @property 15 | def message(self) -> str: 16 | return self.content 17 | 18 | def __str__(self) -> str: 19 | ret = f'**MessageAction** (source={self.source})\n' 20 | ret += f'CONTENT: {self.content}' 21 | return ret 22 | -------------------------------------------------------------------------------- /easyweb/events/action/planning.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ActionType 4 | 5 | from .action import Action 6 | 7 | 8 | @dataclass 9 | class StartPlanningAction(Action): 10 | eta_seconds: float 11 | action: str = ActionType.START_PLANNING 12 | 13 | @property 14 | def message(self) -> str: 15 | return f'Planning... ETA: {self.eta_seconds:.1f} seconds' 16 | 17 | def __str__(self) -> str: 18 | return f'**StartPlanning** (eta_seconds={self.eta_seconds:.1f})' 19 | 20 | 21 | @dataclass 22 | class FinishPlanningAction(Action): 23 | next_step: str 24 | action: str = ActionType.FINISH_PLANNING 25 | 26 | @property 27 | def message(self) -> str: 28 | return self.next_step 29 | 30 | def __str__(self) -> str: 31 | return f'**FinishPlanning**\nNEXT STEP: {self.next_step}' 32 | -------------------------------------------------------------------------------- /easyweb/events/action/tasks.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | from easyweb.core.schema import ActionType 4 | 5 | from .action import Action 6 | 7 | 8 | @dataclass 9 | class AddTaskAction(Action): 10 | parent: str 11 | goal: str 12 | subtasks: list = field(default_factory=list) 13 | thought: str = '' 14 | action: str = ActionType.ADD_TASK 15 | 16 | @property 17 | def message(self) -> str: 18 | return f'Added task: {self.goal}' 19 | 20 | 21 | @dataclass 22 | class ModifyTaskAction(Action): 23 | task_id: str 24 | state: str 25 | thought: str = '' 26 | action: str = ActionType.MODIFY_TASK 27 | 28 | @property 29 | def message(self) -> str: 30 | return f'Set task {self.task_id} to {self.state}' 31 | -------------------------------------------------------------------------------- /easyweb/events/event.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from dataclasses import dataclass 3 | from enum import Enum 4 | 5 | 6 | class EventSource(str, Enum): 7 | AGENT = 'agent' 8 | USER = 'user' 9 | 10 | 11 | @dataclass 12 | class Event: 13 | @property 14 | def message(self) -> str | None: 15 | if hasattr(self, '_message'): 16 | return self._message # type: ignore [attr-defined] 17 | return '' 18 | 19 | @property 20 | def id(self) -> int | None: 21 | if hasattr(self, '_id'): 22 | return self._id # type: ignore [attr-defined] 23 | return -1 24 | 25 | @property 26 | def timestamp(self) -> datetime.datetime | None: 27 | if hasattr(self, '_timestamp'): 28 | return self._timestamp # type: ignore [attr-defined] 29 | return None 30 | 31 | @property 32 | def source(self) -> EventSource | None: 33 | if hasattr(self, '_source'): 34 | return self._source # type: ignore [attr-defined] 35 | return None 36 | 37 | @property 38 | def cause(self) -> int | None: 39 | if hasattr(self, '_cause'): 40 | return self._cause # type: ignore [attr-defined] 41 | return None 42 | -------------------------------------------------------------------------------- /easyweb/events/observation/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent import AgentStateChangedObservation 2 | from .browse import BrowserOutputObservation 3 | from .commands import CmdOutputObservation, IPythonRunCellObservation 4 | from .delegate import AgentDelegateObservation 5 | from .empty import NullObservation 6 | from .error import ErrorObservation 7 | from .files import FileReadObservation, FileWriteObservation 8 | from .observation import Observation 9 | from .recall import AgentRecallObservation 10 | from .success import SuccessObservation 11 | 12 | __all__ = [ 13 | 'Observation', 14 | 'NullObservation', 15 | 'CmdOutputObservation', 16 | 'IPythonRunCellObservation', 17 | 'BrowserOutputObservation', 18 | 'FileReadObservation', 19 | 'FileWriteObservation', 20 | 'AgentRecallObservation', 21 | 'ErrorObservation', 22 | 'AgentStateChangedObservation', 23 | 'AgentDelegateObservation', 24 | 'SuccessObservation', 25 | ] 26 | -------------------------------------------------------------------------------- /easyweb/events/observation/agent.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class AgentStateChangedObservation(Observation): 10 | """ 11 | This data class represents the result from delegating to another agent 12 | """ 13 | 14 | agent_state: str 15 | observation: str = ObservationType.AGENT_STATE_CHANGED 16 | 17 | @property 18 | def message(self) -> str: 19 | return '' 20 | -------------------------------------------------------------------------------- /easyweb/events/observation/browse.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class BrowserOutputObservation(Observation): 10 | """ 11 | This data class represents the output of a browser. 12 | """ 13 | 14 | url: str 15 | screenshot: str = field(repr=False) # don't show in repr 16 | status_code: int = 200 17 | error: bool = False 18 | observation: str = ObservationType.BROWSE 19 | # do not include in the memory 20 | open_pages_urls: list = field(default_factory=list) 21 | active_page_index: int = -1 22 | dom_object: dict = field(default_factory=dict, repr=False) # don't show in repr 23 | axtree_object: dict = field(default_factory=dict, repr=False) # don't show in repr 24 | extra_element_properties: dict = field( 25 | default_factory=dict, repr=False 26 | ) # don't show in repr 27 | last_browser_action: str = '' 28 | last_browser_action_error: str = '' 29 | focused_element_bid: str = '' 30 | scroll_position: dict = field(default_factory=dict, repr=False) 31 | 32 | @property 33 | def message(self) -> str: 34 | return 'Visited ' + self.url 35 | 36 | def __str__(self) -> str: 37 | return ( 38 | '**BrowserOutputObservation**\n' 39 | f'URL: {self.url}\n' 40 | f'Scroll Position: {self.scroll_position}\n' 41 | f'Status code: {self.status_code}\n' 42 | f'Error: {self.error}\n' 43 | f'Open pages: {self.open_pages_urls}\n' 44 | f'Active page index: {self.active_page_index}\n' 45 | f'Last browser action: {self.last_browser_action}\n' 46 | f'Last browser action error: {self.last_browser_action_error}\n' 47 | f'Focused element bid: {self.focused_element_bid}\n' 48 | f'CONTENT: {self.content[:1000]}\n' 49 | ) 50 | -------------------------------------------------------------------------------- /easyweb/events/observation/commands.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class CmdOutputObservation(Observation): 10 | """ 11 | This data class represents the output of a command. 12 | """ 13 | 14 | command_id: int 15 | command: str 16 | exit_code: int = 0 17 | observation: str = ObservationType.RUN 18 | 19 | @property 20 | def error(self) -> bool: 21 | return self.exit_code != 0 22 | 23 | @property 24 | def message(self) -> str: 25 | return f'Command `{self.command}` executed with exit code {self.exit_code}.' 26 | 27 | def __str__(self) -> str: 28 | return f'**CmdOutputObservation (exit code={self.exit_code})**\n{self.content}' 29 | 30 | 31 | @dataclass 32 | class IPythonRunCellObservation(Observation): 33 | """ 34 | This data class represents the output of a IPythonRunCellAction. 35 | """ 36 | 37 | code: str 38 | observation: str = ObservationType.RUN_IPYTHON 39 | 40 | @property 41 | def error(self) -> bool: 42 | return False # IPython cells do not return exit codes 43 | 44 | @property 45 | def message(self) -> str: 46 | return 'Code executed in IPython cell.' 47 | 48 | def __str__(self) -> str: 49 | return f'**IPythonRunCellObservation**\n{self.content}' 50 | -------------------------------------------------------------------------------- /easyweb/events/observation/delegate.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class AgentDelegateObservation(Observation): 10 | """ 11 | This data class represents the result from delegating to another agent 12 | """ 13 | 14 | outputs: dict 15 | observation: str = ObservationType.DELEGATE 16 | 17 | @property 18 | def message(self) -> str: 19 | return '' 20 | -------------------------------------------------------------------------------- /easyweb/events/observation/empty.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class NullObservation(Observation): 10 | """ 11 | This data class represents a null observation. 12 | This is used when the produced action is NOT executable. 13 | """ 14 | 15 | observation: str = ObservationType.NULL 16 | 17 | @property 18 | def message(self) -> str: 19 | return 'No observation' 20 | -------------------------------------------------------------------------------- /easyweb/events/observation/error.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class ErrorObservation(Observation): 10 | """ 11 | This data class represents an error encountered by the agent. 12 | """ 13 | 14 | observation: str = ObservationType.ERROR 15 | 16 | @property 17 | def message(self) -> str: 18 | return self.content 19 | -------------------------------------------------------------------------------- /easyweb/events/observation/files.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class FileReadObservation(Observation): 10 | """ 11 | This data class represents the content of a file. 12 | """ 13 | 14 | path: str 15 | observation: str = ObservationType.READ 16 | 17 | @property 18 | def message(self) -> str: 19 | return f'I read the file {self.path}.' 20 | 21 | 22 | @dataclass 23 | class FileWriteObservation(Observation): 24 | """ 25 | This data class represents a file write operation 26 | """ 27 | 28 | path: str 29 | observation: str = ObservationType.WRITE 30 | 31 | @property 32 | def message(self) -> str: 33 | return f'I wrote to the file {self.path}.' 34 | -------------------------------------------------------------------------------- /easyweb/events/observation/observation.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.events.event import Event 4 | 5 | 6 | @dataclass 7 | class Observation(Event): 8 | content: str 9 | -------------------------------------------------------------------------------- /easyweb/events/observation/recall.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class AgentRecallObservation(Observation): 10 | """ 11 | This data class represents a list of memories recalled by the agent. 12 | """ 13 | 14 | memories: list[str] 15 | role: str = 'assistant' 16 | observation: str = ObservationType.RECALL 17 | 18 | @property 19 | def message(self) -> str: 20 | return 'The agent recalled memories.' 21 | -------------------------------------------------------------------------------- /easyweb/events/observation/success.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from easyweb.core.schema import ObservationType 4 | 5 | from .observation import Observation 6 | 7 | 8 | @dataclass 9 | class SuccessObservation(Observation): 10 | """ 11 | This data class represents the result of a successful action. 12 | """ 13 | 14 | observation: str = ObservationType.SUCCESS 15 | 16 | @property 17 | def message(self) -> str: 18 | return self.content 19 | -------------------------------------------------------------------------------- /easyweb/events/serialization/__init__.py: -------------------------------------------------------------------------------- 1 | from .action import ( 2 | action_from_dict, 3 | ) 4 | from .event import ( 5 | event_from_dict, 6 | event_to_dict, 7 | event_to_memory, 8 | ) 9 | from .observation import ( 10 | observation_from_dict, 11 | ) 12 | 13 | __all__ = [ 14 | 'action_from_dict', 15 | 'event_from_dict', 16 | 'event_to_dict', 17 | 'event_to_memory', 18 | 'observation_from_dict', 19 | ] 20 | -------------------------------------------------------------------------------- /easyweb/events/serialization/action.py: -------------------------------------------------------------------------------- 1 | from easyweb.core.exceptions import AgentMalformedActionError 2 | from easyweb.events.action.action import Action 3 | from easyweb.events.action.agent import ( 4 | AgentDelegateAction, 5 | AgentFinishAction, 6 | AgentRecallAction, 7 | AgentRejectAction, 8 | ChangeAgentStateAction, 9 | ) 10 | from easyweb.events.action.browse import BrowseInteractiveAction, BrowseURLAction 11 | from easyweb.events.action.commands import ( 12 | CmdKillAction, 13 | CmdRunAction, 14 | IPythonRunCellAction, 15 | ) 16 | from easyweb.events.action.empty import NullAction 17 | from easyweb.events.action.files import FileReadAction, FileWriteAction 18 | from easyweb.events.action.message import MessageAction 19 | from easyweb.events.action.planning import FinishPlanningAction, StartPlanningAction 20 | from easyweb.events.action.tasks import AddTaskAction, ModifyTaskAction 21 | 22 | actions = ( 23 | NullAction, 24 | CmdKillAction, 25 | CmdRunAction, 26 | IPythonRunCellAction, 27 | BrowseURLAction, 28 | BrowseInteractiveAction, 29 | FileReadAction, 30 | FileWriteAction, 31 | AgentRecallAction, 32 | AgentFinishAction, 33 | AgentRejectAction, 34 | AgentDelegateAction, 35 | AddTaskAction, 36 | ModifyTaskAction, 37 | ChangeAgentStateAction, 38 | MessageAction, 39 | StartPlanningAction, 40 | FinishPlanningAction, 41 | ) 42 | 43 | ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions} # type: ignore[attr-defined] 44 | 45 | 46 | def action_from_dict(action: dict) -> Action: 47 | if not isinstance(action, dict): 48 | raise AgentMalformedActionError('action must be a dictionary') 49 | action = action.copy() 50 | if 'action' not in action: 51 | raise AgentMalformedActionError(f"'action' key is not found in {action=}") 52 | if not isinstance(action['action'], str): 53 | raise AgentMalformedActionError( 54 | f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}" 55 | ) 56 | action_class = ACTION_TYPE_TO_CLASS.get(action['action']) 57 | if action_class is None: 58 | raise AgentMalformedActionError( 59 | f"'{action['action']=}' is not defined. Available actions: {ACTION_TYPE_TO_CLASS.keys()}" 60 | ) 61 | args = action.get('args', {}) 62 | try: 63 | decoded_action = action_class(**args) 64 | except TypeError: 65 | raise AgentMalformedActionError(f'action={action} has the wrong arguments') 66 | return decoded_action 67 | -------------------------------------------------------------------------------- /easyweb/events/serialization/event.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict 2 | from datetime import datetime 3 | 4 | from easyweb.events import Event, EventSource 5 | 6 | from .action import action_from_dict 7 | from .observation import observation_from_dict 8 | from .utils import remove_fields 9 | 10 | # TODO: move `content` into `extras` 11 | TOP_KEYS = ['id', 'timestamp', 'source', 'message', 'cause', 'action', 'observation'] 12 | UNDERSCORE_KEYS = ['id', 'timestamp', 'source', 'cause'] 13 | 14 | DELETE_FROM_MEMORY_EXTRAS = { 15 | 'screenshot', 16 | 'dom_object', 17 | 'axtree_object', 18 | 'open_pages_urls', 19 | 'active_page_index', 20 | 'last_browser_action', 21 | 'last_browser_action_error', 22 | 'focused_element_bid', 23 | 'extra_element_properties', 24 | } 25 | 26 | 27 | def event_from_dict(data) -> 'Event': 28 | evt: Event 29 | if 'action' in data: 30 | evt = action_from_dict(data) 31 | elif 'observation' in data: 32 | evt = observation_from_dict(data) 33 | else: 34 | raise ValueError('Unknown event type: ' + data) 35 | for key in UNDERSCORE_KEYS: 36 | if key in data: 37 | value = data[key] 38 | if key == 'timestamp': 39 | value = datetime.fromisoformat(value) 40 | if key == 'source': 41 | value = EventSource(value) 42 | setattr(evt, '_' + key, value) 43 | return evt 44 | 45 | 46 | def event_to_dict(event: 'Event') -> dict: 47 | props = asdict(event) 48 | d = {} 49 | for key in TOP_KEYS: 50 | if hasattr(event, key) and getattr(event, key) is not None: 51 | d[key] = getattr(event, key) 52 | elif hasattr(event, f'_{key}') and getattr(event, f'_{key}') is not None: 53 | d[key] = getattr(event, f'_{key}') 54 | if key == 'id' and d.get('id') == -1: 55 | d.pop('id', None) 56 | if key == 'timestamp' and 'timestamp' in d: 57 | d['timestamp'] = d['timestamp'].isoformat() 58 | if key == 'source' and 'source' in d: 59 | d['source'] = d['source'].value 60 | props.pop(key, None) 61 | if 'action' in d: 62 | d['args'] = props 63 | elif 'observation' in d: 64 | d['content'] = props.pop('content', '') 65 | d['extras'] = props 66 | else: 67 | raise ValueError('Event must be either action or observation') 68 | return d 69 | 70 | 71 | def event_to_memory(event: 'Event') -> dict: 72 | d = event_to_dict(event) 73 | d.pop('id', None) 74 | d.pop('cause', None) 75 | d.pop('timestamp', None) 76 | d.pop('message', None) 77 | if 'extras' in d: 78 | remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS) 79 | return d 80 | -------------------------------------------------------------------------------- /easyweb/events/serialization/observation.py: -------------------------------------------------------------------------------- 1 | from easyweb.events.observation.agent import AgentStateChangedObservation 2 | from easyweb.events.observation.browse import BrowserOutputObservation 3 | from easyweb.events.observation.commands import ( 4 | CmdOutputObservation, 5 | IPythonRunCellObservation, 6 | ) 7 | from easyweb.events.observation.delegate import AgentDelegateObservation 8 | from easyweb.events.observation.empty import NullObservation 9 | from easyweb.events.observation.error import ErrorObservation 10 | from easyweb.events.observation.files import FileReadObservation, FileWriteObservation 11 | from easyweb.events.observation.observation import Observation 12 | from easyweb.events.observation.recall import AgentRecallObservation 13 | from easyweb.events.observation.success import SuccessObservation 14 | 15 | observations = ( 16 | NullObservation, 17 | CmdOutputObservation, 18 | IPythonRunCellObservation, 19 | BrowserOutputObservation, 20 | FileReadObservation, 21 | FileWriteObservation, 22 | AgentRecallObservation, 23 | AgentDelegateObservation, 24 | SuccessObservation, 25 | ErrorObservation, 26 | AgentStateChangedObservation, 27 | ) 28 | 29 | OBSERVATION_TYPE_TO_CLASS = { 30 | observation_class.observation: observation_class # type: ignore[attr-defined] 31 | for observation_class in observations 32 | } 33 | 34 | 35 | def observation_from_dict(observation: dict) -> Observation: 36 | observation = observation.copy() 37 | if 'observation' not in observation: 38 | raise KeyError(f"'observation' key is not found in {observation=}") 39 | observation_class = OBSERVATION_TYPE_TO_CLASS.get(observation['observation']) 40 | if observation_class is None: 41 | raise KeyError( 42 | f"'{observation['observation']=}' is not defined. Available observations: {OBSERVATION_TYPE_TO_CLASS.keys()}" 43 | ) 44 | observation.pop('observation') 45 | observation.pop('message', None) 46 | content = observation.pop('content', '') 47 | extras = observation.pop('extras', {}) 48 | return observation_class(content=content, **extras) 49 | -------------------------------------------------------------------------------- /easyweb/events/serialization/utils.py: -------------------------------------------------------------------------------- 1 | def remove_fields(obj, fields: set[str]): 2 | """ 3 | Remove fields from an object. 4 | 5 | Parameters: 6 | - obj: The dictionary, or list of dictionaries to remove fields from 7 | - fields (set[str]): A set of field names to remove from the object 8 | """ 9 | if isinstance(obj, dict): 10 | for field in fields: 11 | if field in obj: 12 | del obj[field] 13 | for _, value in obj.items(): 14 | remove_fields(value, fields) 15 | elif isinstance(obj, list) or isinstance(obj, tuple): 16 | for item in obj: 17 | remove_fields(item, fields) 18 | elif hasattr(obj, '__dataclass_fields__'): 19 | raise ValueError( 20 | 'Object must not contain dataclass, consider converting to dict first' 21 | ) 22 | -------------------------------------------------------------------------------- /easyweb/llm/bedrock.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import boto3 4 | 5 | from easyweb.core.config import config 6 | from easyweb.core.logger import easyweb_logger as logger 7 | 8 | AWS_ACCESS_KEY_ID = config.llm.aws_access_key_id 9 | AWS_SECRET_ACCESS_KEY = config.llm.aws_secret_access_key 10 | AWS_REGION_NAME = config.llm.aws_region_name 11 | 12 | # It needs to be set as an environment variable, if the variable is configured in the Config file. 13 | if AWS_ACCESS_KEY_ID is not None: 14 | os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID 15 | if AWS_SECRET_ACCESS_KEY is not None: 16 | os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY 17 | if AWS_REGION_NAME is not None: 18 | os.environ['AWS_REGION_NAME'] = AWS_REGION_NAME 19 | 20 | 21 | def list_foundation_models(): 22 | try: 23 | # The AWS bedrock model id is not queried, if no AWS parameters are configured. 24 | if ( 25 | AWS_REGION_NAME is None 26 | or AWS_ACCESS_KEY_ID is None 27 | or AWS_SECRET_ACCESS_KEY is None 28 | ): 29 | return [] 30 | 31 | client = boto3.client( 32 | service_name='bedrock', 33 | region_name=AWS_REGION_NAME, 34 | aws_access_key_id=AWS_ACCESS_KEY_ID, 35 | aws_secret_access_key=AWS_SECRET_ACCESS_KEY, 36 | ) 37 | foundation_models_list = client.list_foundation_models( 38 | byOutputModality='TEXT', byInferenceType='ON_DEMAND' 39 | ) 40 | model_summaries = foundation_models_list['modelSummaries'] 41 | return ['bedrock/' + model['modelId'] for model in model_summaries] 42 | except Exception as err: 43 | logger.warning( 44 | '%s. Please config AWS_REGION_NAME AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY' 45 | ' if you want use bedrock model.', 46 | err, 47 | ) 48 | return [] 49 | 50 | 51 | def remove_error_modelId(model_list): 52 | return list(filter(lambda m: not m.startswith('bedrock'), model_list)) 53 | -------------------------------------------------------------------------------- /easyweb/memory/__init__.py: -------------------------------------------------------------------------------- 1 | from .condenser import MemoryCondenser 2 | from .history import ShortTermHistory 3 | from .memory import LongTermMemory 4 | 5 | __all__ = ['LongTermMemory', 'ShortTermHistory', 'MemoryCondenser'] 6 | -------------------------------------------------------------------------------- /easyweb/memory/condenser.py: -------------------------------------------------------------------------------- 1 | from easyweb.core.logger import easyweb_logger as logger 2 | from easyweb.llm.llm import LLM 3 | 4 | 5 | class MemoryCondenser: 6 | def condense(self, summarize_prompt: str, llm: LLM): 7 | """ 8 | Attempts to condense the monologue by using the llm 9 | 10 | Parameters: 11 | - llm (LLM): llm to be used for summarization 12 | 13 | Raises: 14 | - Exception: the same exception as it got from the llm or processing the response 15 | """ 16 | 17 | try: 18 | messages = [{'content': summarize_prompt, 'role': 'user'}] 19 | resp = llm.do_completion(messages=messages) 20 | summary_response = resp['choices'][0]['message']['content'] 21 | return summary_response 22 | except Exception as e: 23 | logger.error('Error condensing thoughts: %s', str(e), exc_info=False) 24 | 25 | # TODO If the llm fails with ContextWindowExceededError, we can try to condense the monologue chunk by chunk 26 | raise 27 | -------------------------------------------------------------------------------- /easyweb/memory/history.py: -------------------------------------------------------------------------------- 1 | import easyweb.core.utils.json as json 2 | from easyweb.core.exceptions import AgentEventTypeError 3 | from easyweb.core.logger import easyweb_logger as logger 4 | 5 | 6 | class ShortTermHistory: 7 | """ 8 | The short term history is the most recent series of events. 9 | An agent can send this in the prompt or use it for other purpose. 10 | """ 11 | 12 | def __init__(self): 13 | """ 14 | Initialize the empty list of events 15 | """ 16 | self.events = [] 17 | 18 | def add_event(self, event_dict: dict): 19 | """ 20 | Adds an event to memory if it is a valid event. 21 | 22 | Parameters: 23 | - event_dict (dict): The event that we want to add to memory 24 | 25 | Raises: 26 | - AgentEventTypeError: If event_dict is not a dict 27 | """ 28 | if not isinstance(event_dict, dict): 29 | raise AgentEventTypeError() 30 | self.events.append(event_dict) 31 | 32 | def get_events(self): 33 | """ 34 | Get the events in the agent's recent history. 35 | 36 | Returns: 37 | - List: The list of events that the agent remembers easily. 38 | """ 39 | return self.events 40 | 41 | def get_total_length(self): 42 | """ 43 | Gives the total number of characters in all history 44 | 45 | Returns: 46 | - Int: Total number of characters of the recent history. 47 | """ 48 | total_length = 0 49 | for t in self.events: 50 | try: 51 | total_length += len(json.dumps(t)) 52 | except TypeError as e: 53 | logger.error('Error serializing event: %s', str(e), exc_info=False) 54 | return total_length 55 | -------------------------------------------------------------------------------- /easyweb/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | from .docker.exec_box import DockerExecBox 2 | from .docker.local_box import LocalBox 3 | from .docker.ssh_box import DockerSSHBox 4 | from .e2b.sandbox import E2BBox 5 | from .sandbox import Sandbox 6 | 7 | __all__ = ['Sandbox', 'DockerSSHBox', 'DockerExecBox', 'E2BBox', 'LocalBox'] 8 | -------------------------------------------------------------------------------- /easyweb/runtime/browser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/runtime/browser/__init__.py -------------------------------------------------------------------------------- /easyweb/runtime/docker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/runtime/docker/__init__.py -------------------------------------------------------------------------------- /easyweb/runtime/e2b/README.md: -------------------------------------------------------------------------------- 1 | # How to use E2B 2 | 3 | [E2B](https://e2b.dev) is an [open-source](https://github.com/e2b-dev/e2b) secure cloud environment (sandbox) made for running AI-generated code and agents. E2B offers [Python](https://pypi.org/project/e2b/) and [JS/TS](https://www.npmjs.com/package/e2b) SDK to spawn and control these sandboxes. 4 | 5 | ## Getting started 6 | 7 | 1. [Get your API key](https://e2b.dev/docs/getting-started/api-key) 8 | 9 | 1. Set your E2B API key to the `E2B_API_KEY` env var when starting the Docker container 10 | 11 | 1. **Optional** - Install the CLI with NPM. 12 | ```sh 13 | npm install -g @e2b/cli@latest 14 | ``` 15 | Full CLI API is [here](https://e2b.dev/docs/cli/installation). 16 | 17 | ## OpenDevin sandbox 18 | You can use the E2B CLI to create a custom sandbox with a Dockerfile. Read the full guide [here](https://e2b.dev/docs/guide/custom-sandbox). The premade OpenDevin sandbox for E2B is set up in the [`containers` directory](/containers/e2b-sandbox). and it's called `open-devin`. 19 | 20 | ## Debugging 21 | You can connect to a running E2B sandbox with E2B CLI in your terminal. 22 | 23 | - List all running sandboxes (based on your API key) 24 | ```sh 25 | e2b sandbox list 26 | ``` 27 | 28 | - Connect to a running sandbox 29 | ```sh 30 | e2b sandbox connect 31 | ``` 32 | 33 | ## Links 34 | - [E2B Docs](https://e2b.dev/docs) 35 | - [E2B GitHub](https://github.com/e2b-dev/e2b) 36 | -------------------------------------------------------------------------------- /easyweb/runtime/e2b/filestore.py: -------------------------------------------------------------------------------- 1 | from easyweb.storage.files import FileStore 2 | 3 | 4 | class E2BFileStore(FileStore): 5 | def __init__(self, filesystem): 6 | self.filesystem = filesystem 7 | 8 | def write(self, path: str, contents: str) -> None: 9 | self.filesystem.write(path, contents) 10 | 11 | def read(self, path: str) -> str: 12 | return self.filesystem.read(path) 13 | 14 | def list(self, path: str) -> list[str]: 15 | return self.filesystem.list(path) 16 | 17 | def delete(self, path: str) -> None: 18 | self.filesystem.delete(path) 19 | -------------------------------------------------------------------------------- /easyweb/runtime/e2b/process.py: -------------------------------------------------------------------------------- 1 | from e2b import Process as E2BSandboxProcess 2 | 3 | from easyweb.runtime.docker.process import Process 4 | 5 | 6 | class E2BProcess(Process): 7 | def __init__(self, process: E2BSandboxProcess, cmd: str): 8 | self._process = process 9 | self._command = cmd 10 | 11 | def kill(self): 12 | self._process.kill() 13 | 14 | def read_logs(self): 15 | return '\n'.join([m.line for m in self._process.output_messages]) 16 | 17 | @property 18 | def pid(self) -> int: 19 | return int(self._process.process_id) 20 | 21 | @property 22 | def command(self) -> str: 23 | return self._command 24 | 25 | @property 26 | def output_messages(self): 27 | return self._process.output_messages 28 | -------------------------------------------------------------------------------- /easyweb/runtime/e2b/runtime.py: -------------------------------------------------------------------------------- 1 | from easyweb.events.action import ( 2 | FileReadAction, 3 | FileWriteAction, 4 | ) 5 | from easyweb.events.observation import ( 6 | ErrorObservation, 7 | FileReadObservation, 8 | FileWriteObservation, 9 | Observation, 10 | ) 11 | from easyweb.events.stream import EventStream 12 | from easyweb.runtime import Sandbox 13 | from easyweb.runtime.server.files import insert_lines, read_lines 14 | from easyweb.runtime.server.runtime import ServerRuntime 15 | 16 | from .filestore import E2BFileStore 17 | from .sandbox import E2BSandbox 18 | 19 | 20 | class E2BRuntime(ServerRuntime): 21 | def __init__( 22 | self, 23 | event_stream: EventStream, 24 | sid: str = 'default', 25 | sandbox: Sandbox | None = None, 26 | ): 27 | super().__init__(event_stream, sid, sandbox) 28 | if not isinstance(self.sandbox, E2BSandbox): 29 | raise ValueError('E2BRuntime requires an E2BSandbox') 30 | self.file_store = E2BFileStore(self.sandbox.filesystem) 31 | 32 | async def read(self, action: FileReadAction) -> Observation: 33 | content = self.file_store.read(action.path) 34 | lines = read_lines(content.split('\n'), action.start, action.end) 35 | code_view = ''.join(lines) 36 | return FileReadObservation(code_view, path=action.path) 37 | 38 | async def write(self, action: FileWriteAction) -> Observation: 39 | if action.start == 0 and action.end == -1: 40 | self.file_store.write(action.path, action.content) 41 | return FileWriteObservation(content='', path=action.path) 42 | files = self.file_store.list(action.path) 43 | if action.path in files: 44 | all_lines = self.file_store.read(action.path).split('\n') 45 | new_file = insert_lines( 46 | action.content.split('\n'), all_lines, action.start, action.end 47 | ) 48 | self.file_store.write(action.path, ''.join(new_file)) 49 | return FileWriteObservation('', path=action.path) 50 | else: 51 | # FIXME: we should create a new file here 52 | return ErrorObservation(f'File not found: {action.path}') 53 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # Requirements 2 | from .agent_skills import AgentSkillsRequirement 3 | from .jupyter import JupyterRequirement 4 | from .mixin import PluginMixin 5 | from .requirement import PluginRequirement 6 | from .swe_agent_commands import SWEAgentCommandsRequirement 7 | 8 | __all__ = [ 9 | 'PluginMixin', 10 | 'PluginRequirement', 11 | 'AgentSkillsRequirement', 12 | 'JupyterRequirement', 13 | 'SWEAgentCommandsRequirement', 14 | ] 15 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/agent_skills/README.md: -------------------------------------------------------------------------------- 1 | # OpenDevin Skill Sets 2 | 3 | This folder implements a skill/tool set `agentskills` for OpenDevin. 4 | 5 | It is intended to be used by the agent **inside sandbox**. 6 | The skill set will be exposed as a `pip` package that can be installed as a plugin inside the sandbox. 7 | 8 | The skill set can contains a bunch of wrapped tools for agent ([many examples here](https://github.com/OpenDevin/OpenDevin/pull/1914)), for example: 9 | - Audio/Video to text (these are a temporary solution, and we should switch to multimodal models when they are sufficiently cheap 10 | - PDF to text 11 | - etc. 12 | 13 | # Inclusion Criteria 14 | 15 | We are walking a fine line here. 16 | We DON't want to *wrap* every possible python packages and re-teach agent their usage (e.g., LLM already knows `pandas` pretty well, so we don't really need create a skill that reads `csv` - it can just use `pandas`). 17 | 18 | We ONLY want to add a new skill, when: 19 | - Such skill is not easily achievable for LLM to write code directly (e.g., edit code and replace certain line) 20 | - It involves calling an external model (e.g., you need to call a speech to text model, editor model for speculative editing) 21 | 22 | # Intended functionality 23 | 24 | - Tool/skill usage (through `IPythonRunAction`) 25 | 26 | ```python 27 | # In[1] 28 | from agentskills import open_file, edit_file 29 | open_file("/workspace/a.txt") 30 | # Out[1] 31 | [SWE-agent open output] 32 | 33 | # In[2] 34 | edit_file( 35 | "/workspace/a.txt", 36 | start=1, end=3, 37 | content=( 38 | ("REPLACE TEXT") 39 | )) 40 | # Out[1] 41 | [SWE-agent edit output] 42 | ``` 43 | 44 | - Tool/skill retrieval (through `IPythonRunAction`) 45 | 46 | ```python 47 | # In[1] 48 | from agentskills import help_me 49 | 50 | help_me("I want to solve a task that involves reading a bunch of PDFs and reason about them") 51 | 52 | # Out[1] 53 | "Here are the top skills that may be helpful to you: 54 | - `pdf_to_text`: [documentation about the tools] 55 | ... 56 | " 57 | ``` 58 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/agent_skills/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | 4 | from easyweb.runtime.plugins.agent_skills.agentskills import DOCUMENTATION 5 | from easyweb.runtime.plugins.requirement import PluginRequirement 6 | 7 | 8 | @dataclass 9 | class AgentSkillsRequirement(PluginRequirement): 10 | name: str = 'agent_skills' 11 | host_src: str = os.path.dirname( 12 | os.path.abspath(__file__) 13 | ) # The directory of this file (opendevin/runtime/plugins/jupyter) 14 | sandbox_dest: str = '/opendevin/plugins/agent_skills' 15 | bash_script_path: str = 'setup.sh' 16 | documentation: str = DOCUMENTATION 17 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/agent_skills/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # add agent_skills to PATH 6 | echo 'export PATH=/opendevin/plugins/agent_skills:$PATH' >> ~/.bashrc 7 | export PATH=/opendevin/plugins/agent_skills:$PATH 8 | 9 | # add agent_skills to PYTHONPATH 10 | echo 'export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH' >> ~/.bashrc 11 | export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH 12 | 13 | pip install flake8 python-docx PyPDF2 python-pptx pylatexenc openai opencv-python 14 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/jupyter/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | 4 | from easyweb.runtime.plugins.requirement import PluginRequirement 5 | 6 | 7 | @dataclass 8 | class JupyterRequirement(PluginRequirement): 9 | name: str = 'jupyter' 10 | host_src: str = os.path.dirname( 11 | os.path.abspath(__file__) 12 | ) # The directory of this file (opendevin/runtime/plugins/jupyter) 13 | sandbox_dest: str = '/opendevin/plugins/jupyter' 14 | bash_script_path: str = 'setup.sh' 15 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/jupyter/execute_cli: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run the Python script with the specified interpreter 3 | export JUPYTER_PWD=$(pwd) 4 | $OPENDEVIN_PYTHON_INTERPRETER /opendevin/plugins/jupyter/execute_cli.py 5 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/jupyter/execute_cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | 5 | import requests 6 | 7 | # Read the Python code from STDIN 8 | code = sys.stdin.read() 9 | 10 | 11 | def execute_code(code, print_output=True): 12 | PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT') 13 | POST_URL = f'http://localhost:{PORT}/execute' 14 | 15 | # Set the default kernel ID 16 | kernel_id = 'default' 17 | 18 | for i in range(10): 19 | try: 20 | response = requests.post( 21 | POST_URL, json={'kernel_id': kernel_id, 'code': code} 22 | ) 23 | if '500: Internal Server Error' not in response.text: 24 | if print_output: 25 | print(response.text) 26 | break 27 | except requests.exceptions.ConnectionError: 28 | pass 29 | time.sleep(2) 30 | else: 31 | print('Failed to connect to the Jupyter server') 32 | 33 | 34 | if jupyter_pwd := os.environ.get('JUPYTER_PWD'): 35 | execute_code( 36 | f'import os\nos.environ["JUPYTER_PWD"] = "{jupyter_pwd}"\n', print_output=False 37 | ) 38 | 39 | execute_code(code) 40 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/requirement.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class PluginRequirement: 6 | """Requirement for a plugin.""" 7 | 8 | name: str 9 | # FOLDER/FILES to be copied to the sandbox 10 | host_src: str 11 | sandbox_dest: str 12 | # NOTE: bash_script_path should be relative to the `sandbox_dest` path 13 | bash_script_path: str 14 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass, field 3 | 4 | from easyweb.runtime.plugins.requirement import PluginRequirement 5 | from easyweb.runtime.plugins.swe_agent_commands.parse_commands import ( 6 | parse_command_file, 7 | ) 8 | 9 | 10 | def _resolve_to_cur_dir(filename): 11 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) 12 | 13 | 14 | def check_and_parse_command_file(filepath) -> str: 15 | if filepath is None: 16 | raise FileNotFoundError(f'File not found: {filepath}') 17 | return parse_command_file(filepath) 18 | 19 | 20 | DEFAULT_SCRIPT_FILEPATHS = [ 21 | _resolve_to_cur_dir('defaults.sh'), 22 | _resolve_to_cur_dir('search.sh'), 23 | _resolve_to_cur_dir('edit_linting.sh'), 24 | ] 25 | DEFAULT_DOCUMENTATION = ''.join( 26 | [ 27 | check_and_parse_command_file(filepath) 28 | for filepath in DEFAULT_SCRIPT_FILEPATHS 29 | if filepath is not None 30 | ] 31 | ) 32 | 33 | 34 | @dataclass 35 | class SWEAgentCommandsRequirement(PluginRequirement): 36 | name: str = 'swe_agent_commands' 37 | host_src: str = os.path.dirname(os.path.abspath(__file__)) 38 | sandbox_dest: str = '/opendevin/plugins/swe_agent_commands' 39 | bash_script_path: str = 'setup_default.sh' 40 | 41 | scripts_filepaths: list[str | None] = field( 42 | default_factory=lambda: DEFAULT_SCRIPT_FILEPATHS 43 | ) 44 | documentation: str = DEFAULT_DOCUMENTATION 45 | 46 | 47 | CURSOR_SCRIPT_FILEPATHS = [ 48 | _resolve_to_cur_dir('cursors_defaults.sh'), 49 | _resolve_to_cur_dir('cursors_edit_linting.sh'), 50 | _resolve_to_cur_dir('search.sh'), 51 | ] 52 | CURSOR_DOCUMENTATION = ''.join( 53 | [ 54 | check_and_parse_command_file(filepath) 55 | for filepath in CURSOR_SCRIPT_FILEPATHS 56 | if filepath is not None 57 | ] 58 | ) 59 | 60 | 61 | @dataclass 62 | class SWEAgentCursorCommandsRequirement(PluginRequirement): 63 | name: str = 'swe_agent_commands' 64 | host_src: str = os.path.dirname(os.path.abspath(__file__)) 65 | sandbox_dest: str = '/opendevin/plugins/swe_agent_commands' 66 | bash_script_path: str = 'setup_cursor_mode.sh' 67 | 68 | scripts_filepaths: list[str | None] = field( 69 | default_factory=lambda: CURSOR_SCRIPT_FILEPATHS 70 | ) 71 | documentation: str = CURSOR_DOCUMENTATION 72 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/_setup_cursor_mode_env.sh: -------------------------------------------------------------------------------- 1 | # Cursor Mode from SWE-Bench 2 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_cursors_window100-detailed_cmd_format-last_5_history-1_demos.yaml 3 | export WINDOW=200; 4 | export OVERLAP=2; 5 | export CURRENT_LINE=0; 6 | export CURRENT_FILE=''; 7 | export SEARCH_RESULTS=(); 8 | export SEARCH_FILES=(); 9 | export SEARCH_INDEX=0; 10 | export START_INDEX=0; 11 | export END_INDEX=0; 12 | export START_CURSOR=0; 13 | export END_CURSOR=0; 14 | export START_CURSOR_MARK='"<<<<< START CURSOR >>>>>"'; # these have to use double quotes 15 | export END_CURSOR_MARK='"<<<<< END CURSOR >>>>>"'; # these have to use double quotes 16 | 17 | state() { 18 | local working_dir="$PWD"; 19 | if [ -z $CURRENT_FILE ]; then 20 | echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}'; 21 | else 22 | echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}'; 23 | fi 24 | }; 25 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/_setup_default_env.sh: -------------------------------------------------------------------------------- 1 | # Default Mode from SWE-Bench 2 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml 3 | export WINDOW=100; 4 | export OVERLAP=2; 5 | export CURRENT_LINE=0; 6 | export CURRENT_FILE=''; 7 | export SEARCH_RESULTS=(); 8 | export SEARCH_FILES=(); 9 | export SEARCH_INDEX=0; 10 | 11 | state() { 12 | local working_dir="$PWD"; 13 | if [ -z $CURRENT_FILE ]; then 14 | echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}'; 15 | else 16 | echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}'; 17 | fi 18 | }; 19 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/_split_string: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | 5 | def print_flake8_output(input_string, show_line_numbers=False): 6 | for value in input_string.split('\n'): 7 | parts = value.split() 8 | if not show_line_numbers: 9 | print(f"- {' '.join(parts[1:])}") 10 | else: 11 | line_nums = ':'.join(parts[0].split(':')[1:]) 12 | print(f"- {line_nums} {' '.join(parts[1:])}") 13 | 14 | 15 | if __name__ == '__main__': 16 | lint_output = sys.argv[1] 17 | print_flake8_output(lint_output) 18 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/parse_commands.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import yaml 4 | 5 | 6 | @dataclass() 7 | class Command: 8 | name: str 9 | docstring: str | None = None 10 | signature: str | None = None 11 | 12 | 13 | def parse_command_file(filepath: str) -> str: 14 | content = open(filepath, 'r').read() 15 | lines = content.split('\n') 16 | commands: list[Command] = [] 17 | idx = 0 18 | docs: list[str] = [] 19 | while idx < len(lines): 20 | line = lines[idx] 21 | idx += 1 22 | if line.startswith('# '): 23 | docs.append(line[2:]) 24 | elif line.strip().endswith('() {'): 25 | name = line.split()[0][:-2] 26 | while lines[idx].strip() != '}': 27 | idx += 1 28 | docstring, signature = None, name 29 | docs_dict = yaml.safe_load('\n'.join(docs).replace('@yaml', '')) 30 | if docs_dict is not None: 31 | docstring = docs_dict.get('docstring') 32 | arguments = docs_dict.get('arguments', None) 33 | if 'signature' in docs_dict: 34 | signature = docs_dict['signature'] 35 | else: 36 | if arguments is not None: 37 | for param, settings in arguments.items(): 38 | if 'required' in settings: 39 | signature += f' <{param}>' 40 | else: 41 | signature += f' [<{param}>]' 42 | command = Command(name, docstring, signature) 43 | commands.append(command) 44 | docs = [] 45 | function_docs = '' 46 | for cmd in commands: 47 | if cmd.docstring is not None: 48 | function_docs += f'{cmd.signature or cmd.name} - {cmd.docstring}\n' 49 | return function_docs 50 | 51 | 52 | if __name__ == '__main__': 53 | import sys 54 | 55 | if len(sys.argv) < 2: 56 | print('Usage: python parse_commands.py ') 57 | sys.exit(1) 58 | filepath = sys.argv[1] 59 | filepaths = filepath.split(',') 60 | for filepath in filepaths: 61 | docs = parse_command_file(filepath) 62 | print(docs) 63 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/setup_cursor_mode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PIP_CACHE_DIR=$HOME/.cache/pip 4 | pip install flake8 5 | 6 | # Cursor Mode from SWE-Bench 7 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_cursors_window100-detailed_cmd_format-last_5_history-1_demos.yaml#L108-L111 8 | echo 'source /opendevin/plugins/swe_agent_commands/_setup_cursor_mode_env.sh' >> ~/.bashrc 9 | 10 | # make _split_string (py) available 11 | echo 'export PATH=$PATH:/opendevin/plugins/swe_agent_commands' >> ~/.bashrc 12 | 13 | echo 'source /opendevin/plugins/swe_agent_commands/cursors_defaults.sh' >> ~/.bashrc 14 | echo 'source /opendevin/plugins/swe_agent_commands/cursors_edit_linting.sh' >> ~/.bashrc 15 | echo 'source /opendevin/plugins/swe_agent_commands/search.sh' >> ~/.bashrc 16 | 17 | echo 'export SWE_CMD_WORK_DIR="/opendevin/plugins/swe_agent_commands/workdir"' >> ~/.bashrc 18 | sudo mkdir -p /opendevin/plugins/swe_agent_commands/workdir 19 | sudo chmod 777 /opendevin/plugins/swe_agent_commands/workdir 20 | -------------------------------------------------------------------------------- /easyweb/runtime/plugins/swe_agent_commands/setup_default.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PIP_CACHE_DIR=$HOME/.cache/pip 4 | pip install flake8 5 | 6 | # Default Mode from SWE-Bench 7 | # https://github.com/princeton-nlp/SWE-agent/blob/ca54d5556b9db4f4f2be21f09530ce69a72c0305/config/configs/default_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml#L103-L106 8 | echo 'source /opendevin/plugins/swe_agent_commands/_setup_default_env.sh' >> ~/.bashrc 9 | 10 | # make _split_string (py) available 11 | echo 'export PATH=$PATH:/opendevin/plugins/swe_agent_commands' >> ~/.bashrc 12 | 13 | echo 'source /opendevin/plugins/swe_agent_commands/defaults.sh' >> ~/.bashrc 14 | echo 'source /opendevin/plugins/swe_agent_commands/search.sh' >> ~/.bashrc 15 | echo 'source /opendevin/plugins/swe_agent_commands/edit_linting.sh' >> ~/.bashrc 16 | 17 | echo 'export SWE_CMD_WORK_DIR="/opendevin/plugins/swe_agent_commands/workdir"' >> ~/.bashrc 18 | sudo mkdir -p /opendevin/plugins/swe_agent_commands/workdir 19 | sudo chmod 777 /opendevin/plugins/swe_agent_commands/workdir 20 | -------------------------------------------------------------------------------- /easyweb/runtime/process.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Process(ABC): 5 | @property 6 | @abstractmethod 7 | def pid(self) -> int: 8 | pass 9 | 10 | @property 11 | @abstractmethod 12 | def command(self) -> str: 13 | pass 14 | 15 | @abstractmethod 16 | def read_logs(self) -> str: 17 | pass 18 | -------------------------------------------------------------------------------- /easyweb/runtime/sandbox.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from abc import ABC, abstractmethod 4 | 5 | from easyweb.core.config import config 6 | from easyweb.core.schema import CancellableStream 7 | from easyweb.runtime.docker.process import Process 8 | from easyweb.runtime.plugins.mixin import PluginMixin 9 | 10 | 11 | class Sandbox(ABC, PluginMixin): 12 | background_commands: dict[int, Process] = {} 13 | _env: dict[str, str] = {} 14 | is_initial_session: bool = True 15 | 16 | def __init__(self, **kwargs): 17 | for key in os.environ: 18 | if key.startswith('SANDBOX_ENV_'): 19 | sandbox_key = key.removeprefix('SANDBOX_ENV_') 20 | self.add_to_env(sandbox_key, os.environ[key]) 21 | if config.enable_auto_lint: 22 | self.add_to_env('ENABLE_AUTO_LINT', 'true') 23 | self.initialize_plugins: bool = config.initialize_plugins 24 | 25 | def add_to_env(self, key: str, value: str): 26 | self._env[key] = value 27 | # Note: json.dumps gives us nice escaping for free 28 | self.execute(f'export {key}={json.dumps(value)}') 29 | 30 | @abstractmethod 31 | def execute( 32 | self, cmd: str, stream: bool = False, timeout: int | None = None 33 | ) -> tuple[int, str | CancellableStream]: 34 | pass 35 | 36 | @abstractmethod 37 | def execute_in_background(self, cmd: str) -> Process: 38 | pass 39 | 40 | @abstractmethod 41 | def kill_background(self, id: int) -> Process: 42 | pass 43 | 44 | @abstractmethod 45 | def read_logs(self, id: int) -> str: 46 | pass 47 | 48 | @abstractmethod 49 | def close(self): 50 | pass 51 | 52 | @abstractmethod 53 | def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): 54 | pass 55 | 56 | @abstractmethod 57 | def get_working_directory(self): 58 | pass 59 | -------------------------------------------------------------------------------- /easyweb/runtime/server/browse.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from easyweb.core.exceptions import BrowserUnavailableException 4 | from easyweb.core.schema import ActionType 5 | from easyweb.events.observation import BrowserOutputObservation 6 | from easyweb.runtime.browser.browser_env import BrowserEnv 7 | 8 | 9 | async def browse(action, browser: BrowserEnv | None) -> BrowserOutputObservation: 10 | if browser is None: 11 | raise BrowserUnavailableException() 12 | if action.action == ActionType.BROWSE: 13 | # legacy BrowseURLAction 14 | asked_url = action.url 15 | if not asked_url.startswith('http'): 16 | asked_url = os.path.abspath(os.curdir) + action.url 17 | action_str = f'goto("{asked_url}")' 18 | elif action.action == ActionType.BROWSE_INTERACTIVE: 19 | # new BrowseInteractiveAction, supports full featured BrowserGym actions 20 | # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py 21 | action_str = action.browser_actions 22 | else: 23 | raise ValueError(f'Invalid action type: {action.action}') 24 | try: 25 | # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396 26 | obs = browser.step(action_str) 27 | return BrowserOutputObservation( 28 | content=obs['text_content'], # text content of the page 29 | open_pages_urls=obs['open_pages_urls'], # list of open pages 30 | active_page_index=obs['active_page_index'], # index of the active page 31 | dom_object=obs['dom_object'], # DOM object 32 | axtree_object=obs['axtree_object'], # accessibility tree object 33 | extra_element_properties=obs[ 34 | 'extra_element_properties' 35 | ], # extra element properties 36 | last_browser_action=obs['last_action'], # last browser env action performed 37 | focused_element_bid=obs['focused_element_bid'], # focused element bid 38 | screenshot=obs['screenshot'], # base64-encoded screenshot, png 39 | url=obs['url'], # URL of the page 40 | error=True if obs['last_action_error'] else False, # error flag 41 | last_browser_action_error=obs[ 42 | 'last_action_error' 43 | ], # last browser env action error 44 | scroll_position=obs['scroll_position'], 45 | ) 46 | except Exception as e: 47 | return BrowserOutputObservation( 48 | content=str(e), 49 | screenshot='', 50 | error=True, 51 | last_browser_action_error=str(e), 52 | url=asked_url if action.action == ActionType.BROWSE else '', 53 | ) 54 | -------------------------------------------------------------------------------- /easyweb/runtime/tools.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class RuntimeTool(Enum): 5 | BROWSER = 'browser' 6 | -------------------------------------------------------------------------------- /easyweb/runtime/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .system import find_available_tcp_port 2 | 3 | __all__ = ['find_available_tcp_port'] 4 | -------------------------------------------------------------------------------- /easyweb/runtime/utils/singleton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/runtime/utils/singleton.py -------------------------------------------------------------------------------- /easyweb/runtime/utils/system.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | 4 | def find_available_tcp_port() -> int: 5 | """Find an available TCP port, return -1 if none available.""" 6 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 7 | try: 8 | sock.bind(('localhost', 0)) 9 | port = sock.getsockname()[1] 10 | return port 11 | except Exception: 12 | return -1 13 | finally: 14 | sock.close() 15 | -------------------------------------------------------------------------------- /easyweb/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/easyweb/server/__init__.py -------------------------------------------------------------------------------- /easyweb/server/auth/__init__.py: -------------------------------------------------------------------------------- 1 | from .auth import get_sid_from_token, sign_token 2 | 3 | __all__ = ['get_sid_from_token', 'sign_token'] 4 | -------------------------------------------------------------------------------- /easyweb/server/auth/auth.py: -------------------------------------------------------------------------------- 1 | import jwt 2 | from jwt.exceptions import InvalidTokenError 3 | 4 | from easyweb.core.config import config 5 | from easyweb.core.logger import easyweb_logger as logger 6 | 7 | 8 | def get_sid_from_token(token: str) -> str: 9 | """ 10 | Retrieves the session id from a JWT token. 11 | 12 | Parameters: 13 | token (str): The JWT token from which the session id is to be extracted. 14 | 15 | Returns: 16 | str: The session id if found and valid, otherwise an empty string. 17 | """ 18 | try: 19 | # Decode the JWT using the specified secret and algorithm 20 | payload = jwt.decode(token, config.jwt_secret, algorithms=['HS256']) 21 | 22 | # Ensure the payload contains 'sid' 23 | if 'sid' in payload: 24 | return payload['sid'] 25 | else: 26 | logger.error('SID not found in token') 27 | return '' 28 | except InvalidTokenError: 29 | logger.error('Invalid token') 30 | except Exception as e: 31 | logger.exception('Unexpected error decoding token: %s', e) 32 | return '' 33 | 34 | 35 | def sign_token(payload: dict[str, object]) -> str: 36 | """Signs a JWT token.""" 37 | # payload = { 38 | # "sid": sid, 39 | # # "exp": datetime.now(timezone.utc) + timedelta(minutes=15), 40 | # } 41 | return jwt.encode(payload, config.jwt_secret, algorithm='HS256') 42 | -------------------------------------------------------------------------------- /easyweb/server/data_models/feedback.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Literal 2 | 3 | import requests 4 | from pydantic import BaseModel 5 | 6 | from easyweb.core.logger import easyweb_logger as logger 7 | 8 | 9 | class FeedbackDataModel(BaseModel): 10 | version: str 11 | email: str 12 | token: str 13 | feedback: Literal['positive', 'negative'] 14 | permissions: Literal['public', 'private'] 15 | trajectory: list[dict[str, Any]] 16 | 17 | 18 | FEEDBACK_URL = 'https://share-od-trajectory-3u9bw9tx.uc.gateway.dev/share_od_trajectory' 19 | 20 | 21 | def store_feedback(feedback: FeedbackDataModel): 22 | # Start logging 23 | display_feedback = feedback.model_dump() 24 | if 'trajectory' in display_feedback: 25 | display_feedback['trajectory'] = ( 26 | f"elided [length: {len(display_feedback['trajectory'])}" 27 | ) 28 | if 'token' in display_feedback: 29 | display_feedback['token'] = 'elided' 30 | logger.info(f'Got feedback: {display_feedback}') 31 | # Start actual request 32 | response = requests.post( 33 | FEEDBACK_URL, 34 | headers={'Content-Type': 'application/json'}, 35 | json=feedback.model_dump(), 36 | ) 37 | logger.info(f'Stored feedback: {response.status_code} {response.text}') 38 | if response.status_code != 200: 39 | raise ValueError(f'Failed to store feedback: {response.text}') 40 | -------------------------------------------------------------------------------- /easyweb/server/mock/README.md: -------------------------------------------------------------------------------- 1 | # OpenDevin mock server 2 | This is a simple mock server to facilitate development in the frontend. 3 | 4 | ## Start the Server 5 | Follow the instructions in the README to install dependencies. Then run: 6 | ``` 7 | python listen.py 8 | ``` 9 | 10 | Then open the frontend to connect to the mock server. It will simply reply to every received message. 11 | -------------------------------------------------------------------------------- /easyweb/server/mock/listen.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | from fastapi import FastAPI, WebSocket 3 | 4 | from easyweb.core.schema import ActionType 5 | 6 | app = FastAPI() 7 | 8 | 9 | @app.websocket('/ws') 10 | async def websocket_endpoint(websocket: WebSocket): 11 | await websocket.accept() 12 | # send message to mock connection 13 | await websocket.send_json( 14 | {'action': ActionType.INIT, 'message': 'Control loop started.'} 15 | ) 16 | 17 | try: 18 | while True: 19 | # receive message 20 | data = await websocket.receive_json() 21 | print(f'Received message: {data}') 22 | 23 | # send mock response to client 24 | response = {'message': f'receive {data}'} 25 | await websocket.send_json(response) 26 | print(f'Sent message: {response}') 27 | except Exception as e: 28 | print(f'WebSocket Error: {e}') 29 | 30 | 31 | @app.get('/') 32 | def read_root(): 33 | return {'message': 'This is a mock server'} 34 | 35 | 36 | @app.get('/api/options/models') 37 | def read_llm_models(): 38 | return [ 39 | 'gpt-4', 40 | 'gpt-4-turbo-preview', 41 | 'gpt-4-0314', 42 | 'gpt-4-0613', 43 | ] 44 | 45 | 46 | @app.get('/api/options/agents') 47 | def read_llm_agents(): 48 | return [ 49 | 'MonologueAgent', 50 | 'CodeActAgent', 51 | 'PlannerAgent', 52 | ] 53 | 54 | 55 | @app.get('/api/list-files') 56 | def refresh_files(): 57 | return ['hello_world.py'] 58 | 59 | 60 | if __name__ == '__main__': 61 | uvicorn.run(app, host='127.0.0.1', port=3000) 62 | -------------------------------------------------------------------------------- /easyweb/server/session/__init__.py: -------------------------------------------------------------------------------- 1 | from .manager import SessionManager 2 | from .session import Session 3 | 4 | session_manager = SessionManager() 5 | 6 | __all__ = ['Session', 'SessionManager', 'session_manager', 'message_stack'] 7 | -------------------------------------------------------------------------------- /easyweb/server/session/manager.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import time 3 | from typing import Optional 4 | 5 | from fastapi import WebSocket 6 | 7 | from easyweb.core.logger import easyweb_logger as logger 8 | 9 | from .session import Session 10 | 11 | 12 | class SessionManager: 13 | _sessions: dict[str, Session] = {} 14 | cleanup_interval: int = 600 15 | session_timeout: int = 1800 16 | 17 | def __init__(self): 18 | asyncio.create_task(self._cleanup_sessions()) 19 | 20 | def add_or_restart_session(self, sid: str, ws_conn: WebSocket) -> Session: 21 | if sid in self._sessions: 22 | asyncio.create_task(self._sessions[sid].close()) 23 | self._sessions[sid] = Session(sid=sid, ws=ws_conn) 24 | return self._sessions[sid] 25 | 26 | def get_session(self, sid: str) -> Session | None: 27 | if sid not in self._sessions: 28 | return None 29 | return self._sessions.get(sid) 30 | 31 | async def send(self, sid: str, data: dict[str, object]) -> bool: 32 | """Sends data to the client.""" 33 | if sid not in self._sessions: 34 | return False 35 | return await self._sessions[sid].send(data) 36 | 37 | async def send_error(self, sid: str, message: str) -> bool: 38 | """Sends an error message to the client.""" 39 | return await self.send(sid, {'error': True, 'message': message}) 40 | 41 | async def send_message(self, sid: str, message: str) -> bool: 42 | """Sends a message to the client.""" 43 | return await self.send(sid, {'message': message}) 44 | 45 | async def _cleanup_sessions(self): 46 | while True: 47 | current_time = time.time() 48 | session_ids_to_remove = [] 49 | for sid, session in list(self._sessions.items()): 50 | # if session inactive for a long time, remove it 51 | if ( 52 | not session.is_alive 53 | and current_time - session.last_active_ts > self.session_timeout 54 | ): 55 | session_ids_to_remove.append(sid) 56 | 57 | for sid in session_ids_to_remove: 58 | to_del_session: Optional[Session] = self._sessions.pop(sid, None) 59 | if to_del_session is not None: 60 | await to_del_session.close() 61 | logger.info( 62 | f'Session {sid} and related resource have been removed due to inactivity.' 63 | ) 64 | 65 | await asyncio.sleep(self.cleanup_interval) 66 | -------------------------------------------------------------------------------- /easyweb/storage/__init__.py: -------------------------------------------------------------------------------- 1 | from easyweb.core.config import config 2 | 3 | from .files import FileStore 4 | from .local import LocalFileStore 5 | from .memory import InMemoryFileStore 6 | from .s3 import S3FileStore 7 | 8 | 9 | def _get_file_store() -> FileStore: 10 | if config.file_store == 'local': 11 | return LocalFileStore(config.file_store_path) 12 | elif config.file_store == 's3': 13 | return S3FileStore() 14 | return InMemoryFileStore() 15 | 16 | 17 | singleton = _get_file_store() 18 | 19 | 20 | def get_file_store() -> FileStore: 21 | return singleton 22 | -------------------------------------------------------------------------------- /easyweb/storage/files.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | 4 | class FileStore: 5 | @abstractmethod 6 | def write(self, path: str, contents: str) -> None: 7 | pass 8 | 9 | @abstractmethod 10 | def read(self, path: str) -> str: 11 | pass 12 | 13 | @abstractmethod 14 | def list(self, path: str) -> list[str]: 15 | pass 16 | 17 | @abstractmethod 18 | def delete(self, path: str) -> None: 19 | pass 20 | -------------------------------------------------------------------------------- /easyweb/storage/local.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .files import FileStore 4 | 5 | 6 | class LocalFileStore(FileStore): 7 | root: str 8 | 9 | def __init__(self, root: str): 10 | self.root = root 11 | os.makedirs(self.root, exist_ok=True) 12 | 13 | def get_full_path(self, path: str) -> str: 14 | if path.startswith('/'): 15 | path = path[1:] 16 | return os.path.join(self.root, path) 17 | 18 | def write(self, path: str, contents: str) -> None: 19 | full_path = self.get_full_path(path) 20 | os.makedirs(os.path.dirname(full_path), exist_ok=True) 21 | with open(full_path, 'w') as f: 22 | f.write(contents) 23 | 24 | def read(self, path: str) -> str: 25 | full_path = self.get_full_path(path) 26 | with open(full_path, 'r') as f: 27 | return f.read() 28 | 29 | def list(self, path: str) -> list[str]: 30 | full_path = self.get_full_path(path) 31 | files = [os.path.join(path, f) for f in os.listdir(full_path)] 32 | files = [f + '/' if os.path.isdir(self.get_full_path(f)) else f for f in files] 33 | return files 34 | 35 | def delete(self, path: str) -> None: 36 | full_path = self.get_full_path(path) 37 | os.remove(full_path) 38 | -------------------------------------------------------------------------------- /easyweb/storage/memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .files import FileStore 4 | 5 | 6 | class InMemoryFileStore(FileStore): 7 | files: dict[str, str] 8 | 9 | def __init__(self): 10 | self.files = {} 11 | 12 | def write(self, path: str, contents: str) -> None: 13 | self.files[path] = contents 14 | 15 | def read(self, path: str) -> str: 16 | if path not in self.files: 17 | raise FileNotFoundError(path) 18 | return self.files[path] 19 | 20 | def list(self, path: str) -> list[str]: 21 | files = [] 22 | for file in self.files: 23 | if not file.startswith(path): 24 | continue 25 | suffix = file.removeprefix(path) 26 | parts = suffix.split('/') 27 | if parts[0] == '': 28 | parts.pop(0) 29 | if len(parts) == 1: 30 | files.append(file) 31 | else: 32 | dir_path = os.path.join(path, parts[0]) 33 | if not dir_path.endswith('/'): 34 | dir_path += '/' 35 | if dir_path not in files: 36 | files.append(dir_path) 37 | return files 38 | 39 | def delete(self, path: str) -> None: 40 | del self.files[path] 41 | -------------------------------------------------------------------------------- /easyweb/storage/s3.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from minio import Minio 4 | 5 | from .files import FileStore 6 | 7 | AWS_S3_ENDPOINT = 's3.amazonaws.com' 8 | 9 | 10 | class S3FileStore(FileStore): 11 | def __init__(self, endpoint: str = AWS_S3_ENDPOINT) -> None: 12 | access_key = os.getenv('AWS_ACCESS_KEY_ID') 13 | secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') 14 | self.bucket = os.getenv('AWS_S3_BUCKET') 15 | self.client = Minio(endpoint, access_key, secret_key) 16 | 17 | def write(self, path: str, contents: str) -> None: 18 | self.client.put_object(self.bucket, path, contents) 19 | 20 | def read(self, path: str) -> str: 21 | return self.client.get_object(self.bucket, path).data.decode('utf-8') 22 | 23 | def list(self, path: str) -> list[str]: 24 | return [obj.object_name for obj in self.client.list_objects(self.bucket, path)] 25 | 26 | def delete(self, path: str) -> None: 27 | self.client.remove_object(self.bucket, path) 28 | -------------------------------------------------------------------------------- /frontend-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maitrix-org/easyweb/d2b56bc228541d2af2abb1de9c0c440ad830d30c/frontend-icon.png -------------------------------------------------------------------------------- /model_port_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "gpt-4o-mini": 3 | { 4 | "base_url": "https://api.openai.com/v1/", 5 | "requires_key": false, 6 | "default": true, 7 | "display_name": "GPT-4o-mini (Free)" 8 | }, 9 | "gpt-4o": 10 | { 11 | "base_url": "https://api.openai.com/v1/", 12 | "requires_key": true, 13 | "display_name": "GPT-4o" 14 | }, 15 | "o1": 16 | { 17 | "requires_key": true, 18 | "base_url": "https://api.openai.com/v1/", 19 | "display_name": "OpenAI o1" 20 | }, 21 | "o3-mini": 22 | { 23 | "requires_key": true, 24 | "base_url": "https://api.openai.com/v1/", 25 | "display_name": "OpenAI o3-mini" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "easyweb" 3 | version = "0.1.0" 4 | description = "EasyWeb: UI Agents at Your Fingertips" 5 | authors = ["Maitrix Team"] 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/maitrix-org/easyweb" 9 | packages = [ 10 | { include = "reasoners", from = "../llm-reasoners" }, 11 | ] 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.11,<3.13" 15 | datasets = "*" 16 | pandas = "*" 17 | litellm = "*" 18 | google-generativeai = "*" # To use litellm with Gemini Pro API 19 | termcolor = "*" 20 | seaborn = "*" 21 | docker = "*" 22 | fastapi = "*" 23 | toml = "*" 24 | uvicorn = "*" 25 | types-toml = "*" 26 | numpy = "1.26.0" 27 | json-repair = "*" 28 | browsergym = "*" # integrate browsergym as the browsing interface 29 | html2text = "*" 30 | e2b = "^0.17.1" 31 | pexpect = "*" 32 | jinja2 = "^3.1.3" 33 | python-multipart = "*" 34 | boto3 = "*" 35 | minio = "^7.2.7" 36 | gevent = "^24.2.1" 37 | pyarrow = "16.1.0" # transitive dependency, pinned here to avoid conflicts 38 | tenacity = "^8.3.0" 39 | zope-interface = "6.4.post2" 40 | gradio = "5.1.0" 41 | websocket-client = "*" 42 | bs4 = "*" 43 | 44 | [tool.poetry.group.llama-index.dependencies] 45 | llama-index = "*" 46 | llama-index-vector-stores-chroma = "*" 47 | chromadb = "*" 48 | llama-index-embeddings-huggingface = "*" 49 | torch = "2.2.2" 50 | llama-index-embeddings-azure-openai = "*" 51 | llama-index-embeddings-ollama = "*" 52 | 53 | [tool.poetry.group.dev.dependencies] 54 | ruff = "0.4.8" 55 | mypy = "1.10.0" 56 | pre-commit = "3.7.1" 57 | 58 | [tool.poetry.group.test.dependencies] 59 | pytest = "*" 60 | pytest-cov = "*" 61 | pytest-asyncio = "*" 62 | pytest-forked = "*" 63 | flake8 = "*" 64 | openai = "*" 65 | python-docx = "*" 66 | PyPDF2 = "*" 67 | pylatexenc = "*" 68 | python-pptx = "*" 69 | opencv-python = "*" 70 | pandas = "*" 71 | reportlab = "*" 72 | 73 | [tool.coverage.run] 74 | concurrency = ["gevent"] 75 | 76 | 77 | [tool.poetry.group.evaluation.dependencies] 78 | streamlit = "*" 79 | whatthepatch = "*" 80 | retry = "*" 81 | evaluate = "*" 82 | 83 | [build-system] 84 | build-backend = "poetry.core.masonry.api" 85 | requires = [ 86 | "poetry-core", 87 | ] 88 | 89 | [tool.autopep8] 90 | # autopep8 fights with mypy on line length issue 91 | ignore = [ "E501" ] 92 | 93 | [tool.black] 94 | # prevent black (if installed) from changing single quotes to double quotes 95 | skip-string-normalization = true 96 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -p no:warnings 3 | -------------------------------------------------------------------------------- /tests/integration/mock/BrowsingAgent/test_browse_internet/response_001.log: -------------------------------------------------------------------------------- 1 | In order to accomplish my goal, I need to navigate to the localhost:8000 page. 2 | ```goto('http://localhost:8000')``` 3 | -------------------------------------------------------------------------------- /tests/integration/mock/BrowsingAgent/test_browse_internet/response_002.log: -------------------------------------------------------------------------------- 1 | In order to accomplish my goal, I need to click on the button with bid 10 to reveal the answer to life, the universe, and everything. 2 | ```click("10")``` 3 | -------------------------------------------------------------------------------- /tests/integration/mock/BrowsingAgent/test_browse_internet/response_003.log: -------------------------------------------------------------------------------- 1 | In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything. The text states: "The answer is OpenDevin is all you need!" 2 | 3 | ```send_msg_to_user('The answer is OpenDevin is all you need!')``` 4 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_browse_internet/response_001.log: -------------------------------------------------------------------------------- 1 | Sure! Let me browse the server's homepage at http://localhost:8000 and find the ultimate answer to life. 2 | 3 | 4 | Get the content on "http://localhost:8000" 5 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_browse_internet/response_002.log: -------------------------------------------------------------------------------- 1 | In order to accomplish my goal, I need to get the content on the server's homepage at http://localhost:8000. 2 | ```goto('http://localhost:8000')``` 3 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_browse_internet/response_003.log: -------------------------------------------------------------------------------- 1 | In order to accomplish my goal, I need to click on the button labeled "Click me" with bid 10 to reveal the answer to life, the universe, and everything. 2 | ```click("10")``` 3 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_browse_internet/response_004.log: -------------------------------------------------------------------------------- 1 | In order to accomplish my goal, I need to read the static text that reveals the answer to life, the universe, and everything. The text is already visible in the accessibility tree. 2 | 3 | The answer is: "OpenDevin is all you need!" 4 | 5 | ```send_msg_to_user('The answer to life, the universe, and everything is: OpenDevin is all you need!')``` 6 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_browse_internet/response_005.log: -------------------------------------------------------------------------------- 1 | The content of the server's homepage at localhost:8000 is: "The answer to life, the universe, and everything is: OpenDevin is all you need!" 2 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_edits/response_001.log: -------------------------------------------------------------------------------- 1 | Sure! Let's start by opening the `bad.txt` file to see its content. 2 | 3 | 4 | open_file('bad.txt') 5 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_edits/response_002.log: -------------------------------------------------------------------------------- 1 | Let's correct the typos in the `bad.txt` file. 2 | 3 | 4 | edit_file('bad.txt', start=1, end=4, content="""This is a stupid typo. 5 | Really? 6 | No more typos! 7 | Enjoy!""") 8 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_edits/response_003.log: -------------------------------------------------------------------------------- 1 | The typos in `bad.txt` have been corrected. If you need any further assistance, feel free to ask! 2 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_ipython/response_001.log: -------------------------------------------------------------------------------- 1 | Sure! Let me write 'hello world' to a text file named 'test.txt' in the '/workspace' directory without asking for confirmation: 2 | 3 | with open('/workspace/test.txt', 'w') as f: 4 | f.write('hello world') 5 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_ipython/response_002.log: -------------------------------------------------------------------------------- 1 | The text file 'test.txt' containing 'hello world' has been successfully created in the '/workspace' directory. If you need any further assistance or have any more requests, feel free to let me know! 2 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_ipython_module/response_001.log: -------------------------------------------------------------------------------- 1 | Understood! Let's proceed by installing the `pymsgbox` package version 1.0.9: 2 | 3 | 4 | ``` 5 | pythonCopy code` 6 | # Install pymsgbox version 1.0.9 7 | %pip install pymsgbox==1.0.9 8 | ` 9 | ``` 10 | Once the installation is complete, we'll import `pymsgbox` and print its version to a file named `test.txt` in the `/workspace` directory. Let's continue! 11 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_ipython_module/response_002.log: -------------------------------------------------------------------------------- 1 | Now that `pymsgbox` is installed, let's import it and print its version to a file named `test.txt`: 2 | 3 | 4 | ``` 5 | pythonCopy code` 6 | import pymsgbox 7 | 8 | # Get the version of pymsgbox 9 | pymsgbox_version = pymsgbox.__version__ 10 | 11 | # Write the version to a file 12 | with open("/workspace/test.txt", "w") as f: 13 | f.write(pymsgbox_version) 14 | ` 15 | ``` 16 | This will import `pymsgbox`, retrieve its version, and then write the version to the `test.txt` file in the `/workspace` directory. 17 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_ipython_module/response_003.log: -------------------------------------------------------------------------------- 1 | The `pymsgbox` package version 1.0.9 has been successfully installed and its version has been written to the `test.txt` file in the `/workspace` directory. If you need any further assistance, feel free to ask! 2 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log: -------------------------------------------------------------------------------- 1 | Sure! I'll create the shell script `hello.sh` for you. 2 | 3 | 4 | echo 'echo "hello"' > hello.sh 5 | chmod +x hello.sh 6 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log: -------------------------------------------------------------------------------- 1 | The shell script `hello.sh` has been created and made executable. Let's run it to ensure it works correctly. 2 | 3 | 4 | ./hello.sh 5 | -------------------------------------------------------------------------------- /tests/integration/mock/CodeActAgent/test_write_simple_script/response_003.log: -------------------------------------------------------------------------------- 1 | The shell script `hello.sh` successfully prints "hello". If you have any further tasks or questions, feel free to let me know! 2 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/prompt_004.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point. 10 | 11 | 12 | Here's a summary of the codebase, as it relates to this task: 13 | 14 | The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is: 15 | 16 | This is a stupid typoo. 17 | Really? 18 | No mor typos! 19 | Enjoy! 20 | 21 | The engineering team needs to correct the typos in this file. 22 | 23 | 24 | ## Available Actions 25 | * `run` - runs a command on the command line in a Linux shell. Arguments: 26 | * `command` - the command to run 27 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 28 | 29 | * `write` - writes the content to a file. Arguments: 30 | * `path` - the path of the file to write 31 | * `content` - the content to write to the file 32 | 33 | * `read` - reads the content of a file. Arguments: 34 | * `path` - the path of the file to read 35 | 36 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 37 | * `content` - the thought to record 38 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 39 | 40 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 41 | * `outputs` - a dictionary representing the outputs of your task, if any 42 | 43 | 44 | Do NOT finish until you have completed the tasks. 45 | 46 | ## History 47 | Here is a recent history of actions you've taken in service of this plan, 48 | as well as observations you've made. This only includes the MOST RECENT 49 | actions and observations--more may have happened before that. 50 | They are time-ordered, with your most recent action at the bottom. 51 | 52 | [] 53 | 54 | ## Format 55 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 56 | * `action`, which is one of the actions specified here 57 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 58 | 59 | You MUST NOT include any other text besides the JSON response 60 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/prompt_005.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point. 10 | 11 | 12 | Here's a summary of the codebase, as it relates to this task: 13 | 14 | The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is: 15 | 16 | This is a stupid typoo. 17 | Really? 18 | No mor typos! 19 | Enjoy! 20 | 21 | The engineering team needs to correct the typos in this file. 22 | 23 | 24 | ## Available Actions 25 | * `run` - runs a command on the command line in a Linux shell. Arguments: 26 | * `command` - the command to run 27 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 28 | 29 | * `write` - writes the content to a file. Arguments: 30 | * `path` - the path of the file to write 31 | * `content` - the content to write to the file 32 | 33 | * `read` - reads the content of a file. Arguments: 34 | * `path` - the path of the file to read 35 | 36 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 37 | * `content` - the thought to record 38 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 39 | 40 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 41 | * `outputs` - a dictionary representing the outputs of your task, if any 42 | 43 | 44 | Do NOT finish until you have completed the tasks. 45 | 46 | ## History 47 | Here is a recent history of actions you've taken in service of this plan, 48 | as well as observations you've made. This only includes the MOST RECENT 49 | actions and observations--more may have happened before that. 50 | They are time-ordered, with your most recent action at the bottom. 51 | 52 | [[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]] 53 | 54 | ## Format 55 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 56 | * `action`, which is one of the actions specified here 57 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 58 | 59 | You MUST NOT include any other text besides the JSON response 60 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/prompt_006.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point. 10 | 11 | 12 | Here's a summary of the codebase, as it relates to this task: 13 | 14 | The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is: 15 | 16 | This is a stupid typoo. 17 | Really? 18 | No mor typos! 19 | Enjoy! 20 | 21 | The engineering team needs to correct the typos in this file. 22 | 23 | 24 | ## Available Actions 25 | * `run` - runs a command on the command line in a Linux shell. Arguments: 26 | * `command` - the command to run 27 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 28 | 29 | * `write` - writes the content to a file. Arguments: 30 | * `path` - the path of the file to write 31 | * `content` - the content to write to the file 32 | 33 | * `read` - reads the content of a file. Arguments: 34 | * `path` - the path of the file to read 35 | 36 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 37 | * `content` - the thought to record 38 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 39 | 40 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 41 | * `outputs` - a dictionary representing the outputs of your task, if any 42 | 43 | 44 | Do NOT finish until you have completed the tasks. 45 | 46 | ## History 47 | Here is a recent history of actions you've taken in service of this plan, 48 | as well as observations you've made. This only includes the MOST RECENT 49 | actions and observations--more may have happened before that. 50 | They are time-ordered, with your most recent action at the bottom. 51 | 52 | [[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}], [{"source": "agent", "action": "write", "args": {"path": "bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "bad.txt"}}]] 53 | 54 | ## Format 55 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 56 | * `action`, which is one of the actions specified here 57 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 58 | 59 | You MUST NOT include any other text besides the JSON response 60 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/prompt_007.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/prompt_008.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Fix typos in bad.txt. Do not ask me for confirmation at any point. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n", "extras": {"path": "bad.txt"}}]] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_001.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "ls", 5 | "background": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_002.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "read", 3 | "args": { 4 | "path": "bad.txt" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_003.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "finish", 3 | "args": { 4 | "outputs": { 5 | "summary": "The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is:\n\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nThe engineering team needs to correct the typos in this file." 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_004.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "read", 4 | "args": { 5 | "path": "bad.txt" 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_005.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "write", 4 | "args": { 5 | "path": "bad.txt", 6 | "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n" 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_006.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": { 5 | "outputs": {} 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_007.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "read", 4 | "args": { 5 | "path": "bad.txt" 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_edits/response_008.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": { 5 | "outputs": { 6 | "completed": true 7 | } 8 | } 9 | } 10 | ``` 11 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_003.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. 10 | 11 | 12 | Here's a summary of the codebase, as it relates to this task: 13 | 14 | The codebase appears to be empty. Engineers should start everything from scratch. 15 | 16 | 17 | ## Available Actions 18 | * `run` - runs a command on the command line in a Linux shell. Arguments: 19 | * `command` - the command to run 20 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 21 | 22 | * `write` - writes the content to a file. Arguments: 23 | * `path` - the path of the file to write 24 | * `content` - the content to write to the file 25 | 26 | * `read` - reads the content of a file. Arguments: 27 | * `path` - the path of the file to read 28 | 29 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 30 | * `content` - the thought to record 31 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 32 | 33 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 34 | * `outputs` - a dictionary representing the outputs of your task, if any 35 | 36 | 37 | Do NOT finish until you have completed the tasks. 38 | 39 | ## History 40 | Here is a recent history of actions you've taken in service of this plan, 41 | as well as observations you've made. This only includes the MOST RECENT 42 | actions and observations--more may have happened before that. 43 | They are time-ordered, with your most recent action at the bottom. 44 | 45 | [] 46 | 47 | ## Format 48 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 49 | * `action`, which is one of the actions specified here 50 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 51 | 52 | You MUST NOT include any other text besides the JSON response 53 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_004.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. 10 | 11 | 12 | Here's a summary of the codebase, as it relates to this task: 13 | 14 | The codebase appears to be empty. Engineers should start everything from scratch. 15 | 16 | 17 | ## Available Actions 18 | * `run` - runs a command on the command line in a Linux shell. Arguments: 19 | * `command` - the command to run 20 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 21 | 22 | * `write` - writes the content to a file. Arguments: 23 | * `path` - the path of the file to write 24 | * `content` - the content to write to the file 25 | 26 | * `read` - reads the content of a file. Arguments: 27 | * `path` - the path of the file to read 28 | 29 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 30 | * `content` - the thought to record 31 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 32 | 33 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 34 | * `outputs` - a dictionary representing the outputs of your task, if any 35 | 36 | 37 | Do NOT finish until you have completed the tasks. 38 | 39 | ## History 40 | Here is a recent history of actions you've taken in service of this plan, 41 | as well as observations you've made. This only includes the MOST RECENT 42 | actions and observations--more may have happened before that. 43 | They are time-ordered, with your most recent action at the bottom. 44 | 45 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}]] 46 | 47 | ## Format 48 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 49 | * `action`, which is one of the actions specified here 50 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 51 | 52 | You MUST NOT include any other text besides the JSON response 53 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_005.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. 10 | 11 | 12 | Here's a summary of the codebase, as it relates to this task: 13 | 14 | The codebase appears to be empty. Engineers should start everything from scratch. 15 | 16 | 17 | ## Available Actions 18 | * `run` - runs a command on the command line in a Linux shell. Arguments: 19 | * `command` - the command to run 20 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 21 | 22 | * `write` - writes the content to a file. Arguments: 23 | * `path` - the path of the file to write 24 | * `content` - the content to write to the file 25 | 26 | * `read` - reads the content of a file. Arguments: 27 | * `path` - the path of the file to read 28 | 29 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 30 | * `content` - the thought to record 31 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 32 | 33 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 34 | * `outputs` - a dictionary representing the outputs of your task, if any 35 | 36 | 37 | Do NOT finish until you have completed the tasks. 38 | 39 | ## History 40 | Here is a recent history of actions you've taken in service of this plan, 41 | as well as observations you've made. This only includes the MOST RECENT 42 | actions and observations--more may have happened before that. 43 | They are time-ordered, with your most recent action at the bottom. 44 | 45 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]] 46 | 47 | ## Format 48 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 49 | * `action`, which is one of the actions specified here 50 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 51 | 52 | You MUST NOT include any other text besides the JSON response 53 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_006.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_007.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\necho 'hello'\n", "extras": {"path": "hello.sh"}}]] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_008.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\necho 'hello'\n", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_001.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "ls", 5 | "background": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_002.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "finish", 3 | "args": { 4 | "outputs": { 5 | "summary": "The codebase appears to be empty. Engineers should start everything from scratch." 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_003.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "write", 3 | "args": { 4 | "path": "hello.sh", 5 | "content": "#!/bin/bash\necho 'hello'" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_004.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "bash hello.sh", 5 | "background": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_005.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "finish", 3 | "args": { 4 | "outputs": { 5 | "result": "hello" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_006.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "read", 3 | "args": { 4 | "path": "hello.sh" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_007.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "bash hello.sh", 5 | "background": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/DelegatorAgent/test_write_simple_script/response_008.log: -------------------------------------------------------------------------------- 1 | The task was to create a shell script named `hello.sh` that prints "hello" without asking for any confirmation. Based on the history: 2 | 3 | 1. The content of `hello.sh` was read and confirmed to contain: 4 | ```bash 5 | #!/bin/bash 6 | echo 'hello' 7 | ``` 8 | This script correctly prints "hello" when executed. 9 | 10 | 2. The script was executed using the command `bash hello.sh`, and the output was "hello", which matches the expected output. 11 | 12 | Since the script behaves as expected and meets the requirements of the task, the task is completed successfully. 13 | 14 | ```json 15 | { 16 | "action": "finish", 17 | "args": { 18 | "outputs": { 19 | "completed": true 20 | } 21 | } 22 | } 23 | ``` 24 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_edits/response_001.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "delegate", 4 | "args": { 5 | "agent": "TypoFixerAgent", 6 | "inputs": { 7 | "task": "Fix typos in bad.txt" 8 | } 9 | } 10 | } 11 | ``` 12 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_edits/response_002.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "read", 3 | "args": { 4 | "path": "./bad.txt" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_edits/response_003.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "message", 3 | "args": { 4 | "content": "The content of 'bad.txt' has been read. The following typos have been identified:\n1. 'typoo' should be 'typo'\n2. 'mor' should be 'more'\nI will now proceed to fix these typos and overwrite the file with the corrected content.", 5 | "wait_for_response": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_edits/response_004.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "write", 3 | "args": { 4 | "path": "./bad.txt", 5 | "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_edits/response_005.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "finish", 3 | "args": { 4 | "outputs": { 5 | "summary": { 6 | "file": "bad.txt", 7 | "typos_fixed": [ 8 | { 9 | "original": "typoo", 10 | "corrected": "typo" 11 | }, 12 | { 13 | "original": "mor", 14 | "corrected": "more" 15 | } 16 | ] 17 | } 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_edits/response_006.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": { 5 | "outputs": { 6 | "file": "bad.txt", 7 | "typos_fixed": [ 8 | { 9 | "original": "typoo", 10 | "corrected": "typo" 11 | }, 12 | { 13 | "original": "mor", 14 | "corrected": "more" 15 | } 16 | ] 17 | } 18 | } 19 | } 20 | ``` 21 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/prompt_002.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a responsible software engineer and always write good commit messages. 7 | 8 | Please analyze the diff in the staging area, understand the context and content 9 | of the updates from the diff only. Identify key elements like: 10 | - Which files are affected? 11 | - What types of changes were made (e.g., new features, bug fixes, refactoring, documentation, testing)? 12 | 13 | Then you should generate a commit message that succinctly summarizes the staged 14 | changes. The commit message should include: 15 | - A summary line that clearly states the purpose of the changes. 16 | - Optionally, a detailed description if the changes are complex or need further explanation. 17 | 18 | You should find the diff using `git diff --cached`, compile a commit message, 19 | and call the `finish` action with `outputs.answer` set to the answer. If current 20 | repo is not a valid git repo, or there is no diff in the staging area, please call 21 | the `reject` action. 22 | 23 | ## History 24 | Here is a recent history of actions you've taken in service of this plan, 25 | as well as observations you've made. This only includes the MOST RECENT 26 | actions and observations--more may have happened before that. 27 | They are time-ordered, with your most recent action at the bottom. 28 | 29 | [] 30 | 31 | If the last item in the history is an error, you should try to fix it. 32 | 33 | ## Available Actions 34 | * `run` - runs a command on the command line in a Linux shell. Arguments: 35 | * `command` - the command to run 36 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 37 | 38 | * `reject` - reject the task. Arguments: 39 | * `outputs` - a dictionary with only a `reason` attribute 40 | 41 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 42 | * `outputs` - a dictionary representing the outputs of your task, if any 43 | 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_001.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "delegate", 4 | "args": { 5 | "agent": "CommitWriterAgent", 6 | "inputs": {} 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_002.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "run", 4 | "args": { 5 | "command": "git diff --cached" 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_003.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "run", 4 | "args": { 5 | "command": "git diff --cached", 6 | "background": false 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_004.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "run", 4 | "args": { 5 | "command": "git diff --staged", 6 | "background": false 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_005.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "reject", 4 | "args": { 5 | "reason": "The current repository is not a valid git repository or there is no diff in the staging area." 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_006.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "reject", 4 | "args": { 5 | "reason": "The current repository is not a valid git repository or there is no diff in the staging area." 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_007.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "reject", 4 | "args": { 5 | "outputs": { 6 | "reason": "The current repository is not a valid git repository or there is no diff in the staging area." 7 | } 8 | } 9 | } 10 | ``` 11 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_simple_task_rejection/response_008.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "reject", 4 | "args": { 5 | "outputs": { 6 | "reason": "The current repository is not a valid git repository or there is no diff in the staging area." 7 | } 8 | } 9 | } 10 | ``` 11 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_002.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. 10 | 11 | 12 | 13 | ## Available Actions 14 | * `run` - runs a command on the command line in a Linux shell. Arguments: 15 | * `command` - the command to run 16 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 17 | 18 | * `write` - writes the content to a file. Arguments: 19 | * `path` - the path of the file to write 20 | * `content` - the content to write to the file 21 | 22 | * `read` - reads the content of a file. Arguments: 23 | * `path` - the path of the file to read 24 | 25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 26 | * `content` - the thought to record 27 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 28 | 29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 30 | * `outputs` - a dictionary representing the outputs of your task, if any 31 | 32 | 33 | Do NOT finish until you have completed the tasks. 34 | 35 | ## History 36 | Here is a recent history of actions you've taken in service of this plan, 37 | as well as observations you've made. This only includes the MOST RECENT 38 | actions and observations--more may have happened before that. 39 | They are time-ordered, with your most recent action at the bottom. 40 | 41 | [] 42 | 43 | ## Format 44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 45 | * `action`, which is one of the actions specified here 46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 47 | 48 | You MUST NOT include any other text besides the JSON response 49 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_003.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. 10 | 11 | 12 | 13 | ## Available Actions 14 | * `run` - runs a command on the command line in a Linux shell. Arguments: 15 | * `command` - the command to run 16 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 17 | 18 | * `write` - writes the content to a file. Arguments: 19 | * `path` - the path of the file to write 20 | * `content` - the content to write to the file 21 | 22 | * `read` - reads the content of a file. Arguments: 23 | * `path` - the path of the file to read 24 | 25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 26 | * `content` - the thought to record 27 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 28 | 29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 30 | * `outputs` - a dictionary representing the outputs of your task, if any 31 | 32 | 33 | Do NOT finish until you have completed the tasks. 34 | 35 | ## History 36 | Here is a recent history of actions you've taken in service of this plan, 37 | as well as observations you've made. This only includes the MOST RECENT 38 | actions and observations--more may have happened before that. 39 | They are time-ordered, with your most recent action at the bottom. 40 | 41 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}]] 42 | 43 | ## Format 44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 45 | * `action`, which is one of the actions specified here 46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 47 | 48 | You MUST NOT include any other text besides the JSON response 49 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_004.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. 10 | 11 | 12 | 13 | ## Available Actions 14 | * `run` - runs a command on the command line in a Linux shell. Arguments: 15 | * `command` - the command to run 16 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 17 | 18 | * `write` - writes the content to a file. Arguments: 19 | * `path` - the path of the file to write 20 | * `content` - the content to write to the file 21 | 22 | * `read` - reads the content of a file. Arguments: 23 | * `path` - the path of the file to read 24 | 25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 26 | * `content` - the thought to record 27 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 28 | 29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 30 | * `outputs` - a dictionary representing the outputs of your task, if any 31 | 32 | 33 | Do NOT finish until you have completed the tasks. 34 | 35 | ## History 36 | Here is a recent history of actions you've taken in service of this plan, 37 | as well as observations you've made. This only includes the MOST RECENT 38 | actions and observations--more may have happened before that. 39 | They are time-ordered, with your most recent action at the bottom. 40 | 41 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]] 42 | 43 | ## Format 44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 45 | * `action`, which is one of the actions specified here 46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 47 | 48 | You MUST NOT include any other text besides the JSON response 49 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_005.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a software engineer. You've inherited an existing codebase, which you 7 | need to modify to complete this task: 8 | 9 | Write a shell script 'hello.sh' that prints 'hello'. 10 | 11 | 12 | 13 | ## Available Actions 14 | * `run` - runs a command on the command line in a Linux shell. Arguments: 15 | * `command` - the command to run 16 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 17 | 18 | * `write` - writes the content to a file. Arguments: 19 | * `path` - the path of the file to write 20 | * `content` - the content to write to the file 21 | 22 | * `read` - reads the content of a file. Arguments: 23 | * `path` - the path of the file to read 24 | 25 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 26 | * `content` - the thought to record 27 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 28 | 29 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 30 | * `outputs` - a dictionary representing the outputs of your task, if any 31 | 32 | 33 | Do NOT finish until you have completed the tasks. 34 | 35 | ## History 36 | Here is a recent history of actions you've taken in service of this plan, 37 | as well as observations you've made. This only includes the MOST RECENT 38 | actions and observations--more may have happened before that. 39 | They are time-ordered, with your most recent action at the bottom. 40 | 41 | [[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}], [{"source": "agent", "action": "run", "args": {"command": "./hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]] 42 | 43 | ## Format 44 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 45 | * `action`, which is one of the actions specified here 46 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 47 | 48 | You MUST NOT include any other text besides the JSON response 49 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_007.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Verify that the shell script 'hello.sh' prints 'hello'. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_008.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Verify that the shell script 'hello.sh' prints 'hello'. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'\n", "extras": {"path": "hello.sh"}}]] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_009.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | ---------- 4 | 5 | # Task 6 | You are a quality assurance engineer. Another engineer has made changes to the 7 | codebase which are supposed to solve this task: 8 | 9 | Verify that the shell script 'hello.sh' prints 'hello'. 10 | 11 | Note the changes might have already been applied in-line. You should focus on 12 | validating if the task is solved, nothing else. 13 | 14 | ## Available Actions 15 | * `run` - runs a command on the command line in a Linux shell. Arguments: 16 | * `command` - the command to run 17 | * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. 18 | 19 | * `read` - reads the content of a file. Arguments: 20 | * `path` - the path of the file to read 21 | 22 | * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: 23 | * `content` - the thought to record 24 | * `wait_for_response` - set to `true` to wait for the user to respond before proceeding 25 | 26 | * `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: 27 | * `outputs` - a dictionary representing the outputs of your task, if any 28 | 29 | 30 | You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. 31 | 32 | Do NOT finish until you know whether the task is complete and correct. 33 | When you're done, add a `completed` boolean to the `outputs` of the `finish` action. 34 | If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action 35 | explaining what the problem is. 36 | 37 | ## History 38 | Here is a recent history of actions you've taken in service of this plan, 39 | as well as observations you've made. This only includes the MOST RECENT 40 | actions and observations--more may have happened before that. 41 | They are time-ordered, with your most recent action at the bottom. 42 | 43 | [[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'\n", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]] 44 | 45 | ## Format 46 | Your response MUST be in JSON format. It must be an object, and it must contain two fields: 47 | * `action`, which is one of the actions specified here 48 | * `args`, which is a map of key-value pairs, specifying the arguments for that action 49 | 50 | You MUST NOT include any other text besides the JSON response 51 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_001.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "delegate", 4 | "args": { 5 | "agent": "CoderAgent", 6 | "inputs": { 7 | "task": "Write a shell script 'hello.sh' that prints 'hello'.", 8 | "summary": "" 9 | } 10 | } 11 | } 12 | ``` 13 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_002.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "write", 4 | "args": { 5 | "path": "hello.sh", 6 | "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'" 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_003.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "chmod +x hello.sh" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_004.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "./hello.sh", 5 | "background": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_005.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": { 5 | "outputs": {} 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_006.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "delegate", 4 | "args": { 5 | "agent": "VerifierAgent", 6 | "inputs": { 7 | "task": "Verify that the shell script 'hello.sh' prints 'hello'." 8 | } 9 | } 10 | } 11 | ``` 12 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_007.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "read", 4 | "args": { 5 | "path": "hello.sh" 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_008.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "run", 4 | "args": { 5 | "command": "bash hello.sh" 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_009.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": { 5 | "outputs": { 6 | "completed": true 7 | } 8 | } 9 | } 10 | ``` 11 | -------------------------------------------------------------------------------- /tests/integration/mock/ManagerAgent/test_write_simple_script/response_010.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": { 5 | "outputs": {} 6 | } 7 | } 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/integration/mock/MonologueAgent/test_write_simple_script/response_001.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "write", 4 | "args": { 5 | "path": "hello.sh", 6 | "content": "#!/bin/bash\necho 'hello'" 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/MonologueAgent/test_write_simple_script/response_002.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "run", 4 | "args": { 5 | "command": "bash hello.sh", 6 | "background": false 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/MonologueAgent/test_write_simple_script/response_003.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "message", 4 | "args": { 5 | "content": "I have successfully created and executed the 'hello.sh' script, which printed 'hello' as expected. I believe I have completed the task as specified.", 6 | "wait_for_response": false 7 | } 8 | } 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/integration/mock/MonologueAgent/test_write_simple_script/response_004.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "finish", 4 | "args": {} 5 | } 6 | ``` 7 | -------------------------------------------------------------------------------- /tests/integration/mock/PlannerAgent/test_write_simple_script/response_001.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "add_task", 3 | "args": { 4 | "parent": "", 5 | "goal": "Write a shell script 'hello.sh' that prints 'hello'", 6 | "subtasks": [] 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tests/integration/mock/PlannerAgent/test_write_simple_script/response_002.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "modify_task", 3 | "args": { 4 | "task_id": "0", 5 | "state": "in_progress" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/PlannerAgent/test_write_simple_script/response_003.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "write", 3 | "args": { 4 | "path": "hello.sh", 5 | "content": "#!/bin/bash\necho 'hello'" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/PlannerAgent/test_write_simple_script/response_004.log: -------------------------------------------------------------------------------- 1 | { 2 | "action": "run", 3 | "args": { 4 | "command": "bash hello.sh", 5 | "background": false 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/mock/PlannerAgent/test_write_simple_script/response_005.log: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "action": "modify_task", 4 | "args": { 5 | "task_id": "0", 6 | "state": "completed", 7 | "thought": "The script 'hello.sh' was written and executed successfully, producing the expected output 'hello'." 8 | } 9 | } 10 | ``` 11 | -------------------------------------------------------------------------------- /tests/integration/start_http_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | from http.server import HTTPServer, SimpleHTTPRequestHandler 3 | 4 | web_dir = os.path.join(os.path.dirname(__file__), 'static') 5 | os.chdir(web_dir) 6 | handler = SimpleHTTPRequestHandler 7 | 8 | # Start the server 9 | server = HTTPServer(('localhost', 8000), handler) 10 | server.serve_forever() 11 | -------------------------------------------------------------------------------- /tests/integration/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | The Ultimate Answer 7 | 52 | 53 | 54 |
55 |

The Ultimate Answer

56 |

Click the button to reveal the answer to life, the universe, and everything.

57 | 58 |
59 |
60 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /tests/integration/workspace/test_edits/bad.txt: -------------------------------------------------------------------------------- 1 | This is a stupid typoo. 2 | Really? 3 | No mor typos! 4 | Enjoy! 5 | -------------------------------------------------------------------------------- /tests/test_fileops.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from opendevin.core.config import config 6 | from opendevin.runtime.server import files 7 | 8 | SANDBOX_PATH_PREFIX = '/workspace' 9 | 10 | 11 | def test_resolve_path(): 12 | assert ( 13 | files.resolve_path('test.txt', '/workspace') 14 | == Path(config.workspace_base) / 'test.txt' 15 | ) 16 | assert ( 17 | files.resolve_path('subdir/test.txt', '/workspace') 18 | == Path(config.workspace_base) / 'subdir' / 'test.txt' 19 | ) 20 | assert ( 21 | files.resolve_path(Path(SANDBOX_PATH_PREFIX) / 'test.txt', '/workspace') 22 | == Path(config.workspace_base) / 'test.txt' 23 | ) 24 | assert ( 25 | files.resolve_path( 26 | Path(SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt', '/workspace' 27 | ) 28 | == Path(config.workspace_base) / 'subdir' / 'test.txt' 29 | ) 30 | assert ( 31 | files.resolve_path( 32 | Path(SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt', '/workspace' 33 | ) 34 | == Path(config.workspace_base) / 'test.txt' 35 | ) 36 | with pytest.raises(PermissionError): 37 | files.resolve_path(Path(SANDBOX_PATH_PREFIX) / '..' / 'test.txt', '/workspace') 38 | with pytest.raises(PermissionError): 39 | files.resolve_path(Path('..') / 'test.txt', '/workspace') 40 | with pytest.raises(PermissionError): 41 | files.resolve_path(Path('/') / 'test.txt', '/workspace') 42 | assert ( 43 | files.resolve_path('test.txt', '/workspace/test') 44 | == Path(config.workspace_base) / 'test' / 'test.txt' 45 | ) 46 | -------------------------------------------------------------------------------- /tests/unit/README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | This folder contains unit tests that could be run locally. 4 | 5 | Run all test: 6 | 7 | ```bash 8 | poetry run pytest ./tests/unit 9 | ``` 10 | 11 | Run specific test file: 12 | 13 | ```bash 14 | poetry run pytest ./tests/unit/test_micro_agents.py 15 | ``` 16 | 17 | Run specific unit test 18 | 19 | ```bash 20 | poetry run pytest ./tests/unit/test_micro_agents.py::test_coder_agent_with_summary 21 | ``` 22 | 23 | For a more verbose output, to above calls the `-v` flag can be used (even more verbose: `-vv` and `-vvv`): 24 | 25 | ```bash 26 | poetry run pytest -v ./tests/unit/test_micro_agents.py 27 | ``` 28 | 29 | More details see [pytest doc](https://docs.pytest.org/en/latest/contents.html) 30 | -------------------------------------------------------------------------------- /tests/unit/test_arg_parser.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from opendevin.core.config import get_parser 4 | 5 | 6 | def test_help_message(capsys): 7 | parser = get_parser() 8 | with pytest.raises(SystemExit): # `--help` causes SystemExit 9 | parser.parse_args(['--help']) 10 | captured = capsys.readouterr() 11 | expected_help_message = """ 12 | usage: pytest [-h] [-d DIRECTORY] [-t TASK] [-f FILE] [-c AGENT_CLS] 13 | [-m MODEL_NAME] [-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK] 14 | [-n MAX_CHARS] [--eval-output-dir EVAL_OUTPUT_DIR] 15 | [--eval-n-limit EVAL_N_LIMIT] 16 | [--eval-num-workers EVAL_NUM_WORKERS] [--eval-note EVAL_NOTE] 17 | [-l LLM_CONFIG] 18 | 19 | Run an agent with a specific task 20 | 21 | options: 22 | -h, --help show this help message and exit 23 | -d DIRECTORY, --directory DIRECTORY 24 | The working directory for the agent 25 | -t TASK, --task TASK The task for the agent to perform 26 | -f FILE, --file FILE Path to a file containing the task. Overrides -t if 27 | both are provided. 28 | -c AGENT_CLS, --agent-cls AGENT_CLS 29 | The agent class to use 30 | -m MODEL_NAME, --model-name MODEL_NAME 31 | The (litellm) model name to use 32 | -i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS 33 | The maximum number of iterations to run the agent 34 | -b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK 35 | The maximum budget allowed per task, beyond which the 36 | agent will stop. 37 | -n MAX_CHARS, --max-chars MAX_CHARS 38 | The maximum number of characters to send to and 39 | receive from LLM per task 40 | --eval-output-dir EVAL_OUTPUT_DIR 41 | The directory to save evaluation output 42 | --eval-n-limit EVAL_N_LIMIT 43 | The number of instances to evaluate 44 | --eval-num-workers EVAL_NUM_WORKERS 45 | The number of workers to use for evaluation 46 | --eval-note EVAL_NOTE 47 | The note to add to the evaluation directory 48 | -l LLM_CONFIG, --llm-config LLM_CONFIG 49 | The group of llm settings, e.g. a [llama3] section in 50 | the toml file. Overrides model if both are provided. 51 | """ 52 | 53 | actual_lines = captured.out.strip().split('\n') 54 | print('\n'.join(actual_lines)) 55 | expected_lines = expected_help_message.strip().split('\n') 56 | 57 | # Ensure both outputs have the same number of lines 58 | assert len(actual_lines) == len( 59 | expected_lines 60 | ), 'The number of lines in the help message does not match.' 61 | 62 | # Compare each line 63 | for actual, expected in zip(actual_lines, expected_lines): 64 | assert ( 65 | actual.strip() == expected.strip() 66 | ), f"Expected '{expected}', got '{actual}'" 67 | -------------------------------------------------------------------------------- /tests/unit/test_event_stream.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from opendevin.events import EventSource, EventStream 6 | from opendevin.events.action import NullAction 7 | from opendevin.events.observation import NullObservation 8 | 9 | 10 | def collect_events(stream): 11 | return [event for event in stream.get_events()] 12 | 13 | 14 | @pytest.mark.asyncio 15 | async def test_basic_flow(): 16 | stream = EventStream('abc') 17 | await stream.add_event(NullAction(), EventSource.AGENT) 18 | assert len(collect_events(stream)) == 1 19 | 20 | 21 | @pytest.mark.asyncio 22 | async def test_stream_storage(): 23 | stream = EventStream('def') 24 | await stream.add_event(NullObservation(''), EventSource.AGENT) 25 | assert len(collect_events(stream)) == 1 26 | content = stream._file_store.read('sessions/def/events/0.json') 27 | assert content is not None 28 | data = json.loads(content) 29 | assert 'timestamp' in data 30 | del data['timestamp'] 31 | assert data == { 32 | 'id': 0, 33 | 'source': 'agent', 34 | 'observation': 'null', 35 | 'content': '', 36 | 'extras': {}, 37 | 'message': 'No observation', 38 | } 39 | 40 | 41 | @pytest.mark.asyncio 42 | async def test_rehydration(): 43 | stream1 = EventStream('es1') 44 | await stream1.add_event(NullObservation('obs1'), EventSource.AGENT) 45 | await stream1.add_event(NullObservation('obs2'), EventSource.AGENT) 46 | assert len(collect_events(stream1)) == 2 47 | 48 | stream2 = EventStream('es2') 49 | assert len(collect_events(stream2)) == 0 50 | 51 | stream1rehydrated = EventStream('es1') 52 | events = collect_events(stream1rehydrated) 53 | assert len(events) == 2 54 | assert events[0].content == 'obs1' 55 | assert events[1].content == 'obs2' 56 | -------------------------------------------------------------------------------- /tests/unit/test_json.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from opendevin.core.utils import json 4 | from opendevin.events.action import MessageAction 5 | 6 | 7 | def test_event_serialization_deserialization(): 8 | message = MessageAction(content='This is a test.', wait_for_response=False) 9 | message._id = 42 10 | message._timestamp = datetime(2020, 1, 1, 0, 0, 0) 11 | serialized = json.dumps(message) 12 | deserialized = json.loads(serialized) 13 | expected = { 14 | 'id': 42, 15 | 'timestamp': '2020-01-01T00:00:00', 16 | 'action': 'message', 17 | 'message': 'This is a test.', 18 | 'args': { 19 | 'content': 'This is a test.', 20 | 'wait_for_response': False, 21 | }, 22 | } 23 | assert deserialized == expected 24 | 25 | 26 | def test_array_serialization_deserialization(): 27 | message = MessageAction(content='This is a test.', wait_for_response=False) 28 | message._id = 42 29 | message._timestamp = datetime(2020, 1, 1, 0, 0, 0) 30 | serialized = json.dumps([message]) 31 | deserialized = json.loads(serialized) 32 | expected = [ 33 | { 34 | 'id': 42, 35 | 'timestamp': '2020-01-01T00:00:00', 36 | 'action': 'message', 37 | 'message': 'This is a test.', 38 | 'args': { 39 | 'content': 'This is a test.', 40 | 'wait_for_response': False, 41 | }, 42 | } 43 | ] 44 | assert deserialized == expected 45 | -------------------------------------------------------------------------------- /tests/unit/test_observation_serialization.py: -------------------------------------------------------------------------------- 1 | from opendevin.events.observation import ( 2 | CmdOutputObservation, 3 | Observation, 4 | ) 5 | from opendevin.events.serialization import ( 6 | event_from_dict, 7 | event_to_dict, 8 | event_to_memory, 9 | ) 10 | 11 | 12 | def serialization_deserialization(original_observation_dict, cls): 13 | observation_instance = event_from_dict(original_observation_dict) 14 | assert isinstance( 15 | observation_instance, Observation 16 | ), 'The observation instance should be an instance of Action.' 17 | assert isinstance( 18 | observation_instance, cls 19 | ), 'The observation instance should be an instance of CmdOutputObservation.' 20 | serialized_observation_dict = event_to_dict(observation_instance) 21 | serialized_observation_memory = event_to_memory(observation_instance) 22 | assert ( 23 | serialized_observation_dict == original_observation_dict 24 | ), 'The serialized observation should match the original observation dict.' 25 | original_observation_dict.pop('message', None) 26 | original_observation_dict.pop('id', None) 27 | original_observation_dict.pop('timestamp', None) 28 | assert ( 29 | serialized_observation_memory == original_observation_dict 30 | ), 'The serialized observation memory should match the original observation dict.' 31 | 32 | 33 | # Additional tests for various observation subclasses can be included here 34 | def test_observation_event_props_serialization_deserialization(): 35 | original_observation_dict = { 36 | 'id': 42, 37 | 'source': 'agent', 38 | 'timestamp': '2021-08-01T12:00:00', 39 | 'observation': 'run', 40 | 'message': 'Command `ls -l` executed with exit code 0.', 41 | 'extras': {'exit_code': 0, 'command': 'ls -l', 'command_id': 3}, 42 | 'content': 'foo.txt', 43 | } 44 | serialization_deserialization(original_observation_dict, CmdOutputObservation) 45 | 46 | 47 | def test_command_output_observation_serialization_deserialization(): 48 | original_observation_dict = { 49 | 'observation': 'run', 50 | 'extras': {'exit_code': 0, 'command': 'ls -l', 'command_id': 3}, 51 | 'message': 'Command `ls -l` executed with exit code 0.', 52 | 'content': 'foo.txt', 53 | } 54 | serialization_deserialization(original_observation_dict, CmdOutputObservation) 55 | -------------------------------------------------------------------------------- /tests/unit/test_storage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | import pytest 5 | 6 | from opendevin.storage.local import LocalFileStore 7 | from opendevin.storage.memory import InMemoryFileStore 8 | 9 | 10 | @pytest.fixture 11 | def setup_env(): 12 | os.makedirs('./_test_files_tmp', exist_ok=True) 13 | 14 | yield 15 | 16 | shutil.rmtree('./_test_files_tmp') 17 | 18 | 19 | def test_basic_fileops(setup_env): 20 | filename = 'test.txt' 21 | for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]: 22 | store.write(filename, 'Hello, world!') 23 | assert store.read(filename) == 'Hello, world!' 24 | assert store.list('') == [filename] 25 | store.delete(filename) 26 | with pytest.raises(FileNotFoundError): 27 | store.read(filename) 28 | 29 | 30 | def test_complex_path_fileops(setup_env): 31 | filenames = ['foo.bar.baz', './foo/bar/baz', 'foo/bar/baz', '/foo/bar/baz'] 32 | for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]: 33 | for filename in filenames: 34 | store.write(filename, 'Hello, world!') 35 | assert store.read(filename) == 'Hello, world!' 36 | store.delete(filename) 37 | with pytest.raises(FileNotFoundError): 38 | store.read(filename) 39 | 40 | 41 | def test_list(setup_env): 42 | for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]: 43 | store.write('foo.txt', 'Hello, world!') 44 | store.write('bar.txt', 'Hello, world!') 45 | store.write('baz.txt', 'Hello, world!') 46 | assert store.list('').sort() == ['foo.txt', 'bar.txt', 'baz.txt'].sort() 47 | store.delete('foo.txt') 48 | store.delete('bar.txt') 49 | store.delete('baz.txt') 50 | 51 | 52 | def test_deep_list(setup_env): 53 | for store in [LocalFileStore('./_test_files_tmp'), InMemoryFileStore()]: 54 | store.write('foo/bar/baz.txt', 'Hello, world!') 55 | store.write('foo/bar/qux.txt', 'Hello, world!') 56 | store.write('foo/bar/quux.txt', 'Hello, world!') 57 | assert store.list('') == ['foo/'], f'for class {store.__class__}' 58 | assert store.list('foo') == ['foo/bar/'] 59 | assert ( 60 | store.list('foo/bar').sort() 61 | == ['foo/bar/baz.txt', 'foo/bar/qux.txt', 'foo/bar/quux.txt'].sort() 62 | ) 63 | store.delete('foo/bar/baz.txt') 64 | store.delete('foo/bar/qux.txt') 65 | store.delete('foo/bar/quux.txt') 66 | --------------------------------------------------------------------------------