├── .dockerignore ├── .env.template ├── .github ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── documentation-clarification.md │ └── feature-request.md ├── PULL_REQUEST_TEMPLATE │ └── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── automation.yml │ ├── ci.yaml │ ├── pre-commit.yaml │ └── release.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── Acknowledgements.md ├── DISCLAIMER.md ├── GOVERNANCE.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── ROADMAP.md ├── TERMS_OF_USE.md ├── WINDOWS_README.md ├── citation.cff ├── docker-compose.yml ├── docker ├── Dockerfile ├── README.md └── entrypoint.sh ├── docs ├── Makefile ├── api_reference.rst ├── code_conduct_link.rst ├── conf.py ├── contributing_link.rst ├── create_api_rst.py ├── disclaimer_link.rst ├── docs_building.md ├── examples │ └── open_llms │ │ ├── README.md │ │ ├── langchain_interface.py │ │ └── openai_api_interface.py ├── index.rst ├── installation.rst ├── introduction.md ├── make.bat ├── open_models.md ├── quickstart.rst ├── roadmap_link.rst ├── terms_link.rst ├── tracing_debugging.md └── windows_readme_link.rst ├── gpt_engineer ├── __init__.py ├── applications │ ├── __init__.py │ └── cli │ │ ├── __init__.py │ │ ├── cli_agent.py │ │ ├── collect.py │ │ ├── file_selector.py │ │ ├── learning.py │ │ └── main.py ├── benchmark │ ├── __init__.py │ ├── __main__.py │ ├── bench_config.py │ ├── benchmarks │ │ ├── apps │ │ │ ├── load.py │ │ │ ├── problem.py │ │ │ └── problems.py │ │ ├── gptme │ │ │ └── load.py │ │ ├── load.py │ │ └── mbpp │ │ │ ├── load.py │ │ │ ├── problem.py │ │ │ └── problems.py │ ├── default_bench_config.toml │ ├── run.py │ └── types.py ├── core │ ├── __init__.py │ ├── ai.py │ ├── base_agent.py │ ├── base_execution_env.py │ ├── base_memory.py │ ├── chat_to_files.py │ ├── default │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── disk_execution_env.py │ │ ├── disk_memory.py │ │ ├── file_store.py │ │ ├── paths.py │ │ ├── simple_agent.py │ │ └── steps.py │ ├── diff.py │ ├── files_dict.py │ ├── git.py │ ├── linting.py │ ├── preprompts_holder.py │ ├── project_config.py │ ├── prompt.py │ ├── token_usage.py │ └── version_manager.py ├── preprompts │ ├── clarify │ ├── entrypoint │ ├── file_format │ ├── file_format_diff │ ├── file_format_fix │ ├── generate │ ├── improve │ ├── philosophy │ └── roadmap └── tools │ ├── __init__.py │ ├── custom_steps.py │ └── supported_languages.py ├── poetry.lock ├── projects ├── example-improve │ ├── README.md │ ├── controller.py │ ├── main.py │ ├── model.py │ ├── prompt │ ├── requirements.txt │ ├── run.sh │ └── view.py ├── example-vision │ ├── images │ │ └── ux_diagram.png │ ├── navigation.html │ └── prompt └── example │ └── prompt ├── pyproject.toml ├── scripts ├── clean_benchmarks.py ├── legacy_benchmark.py ├── print_chat.py └── test_api.py ├── sweep.yaml ├── tests ├── __init__.py ├── ai_cache.json ├── applications │ ├── __init__.py │ └── cli │ │ ├── __init__.py │ │ ├── test_cli_agent.py │ │ ├── test_collect.py │ │ ├── test_collection_consent.py │ │ ├── test_learning.py │ │ └── test_main.py ├── benchmark │ └── test_BenchConfig.py ├── core │ ├── __init__.py │ ├── default │ │ ├── __init__.py │ │ ├── test_disk_execution_env.py │ │ ├── test_disk_file_repository.py │ │ ├── test_simple_agent.py │ │ └── test_steps.py │ ├── improve_function_test_cases │ │ ├── apps_benchmark_6_chat │ │ ├── apps_benchmark_6_code │ │ ├── apps_benchmark_6_v2_chat │ │ ├── apps_benchmark_6_v2_code │ │ ├── controller_chat │ │ ├── controller_code │ │ ├── corrected_diff_from_missing_lines │ │ ├── create_two_new_files_chat │ │ ├── create_two_new_files_code │ │ ├── simple_calculator_chat │ │ ├── simple_calculator_code │ │ ├── task_master_chat │ │ ├── task_master_code │ │ ├── temperature_converter_chat │ │ ├── temperature_converter_code │ │ ├── theo_case_chat │ │ ├── theo_case_code │ │ ├── vgvishesh_example_2_chat │ │ ├── vgvishesh_example_2_code │ │ ├── vgvishesh_example_chat │ │ ├── vgvishesh_example_code │ │ ├── wheaties_example_chat │ │ ├── wheaties_example_code │ │ ├── zbf_yml_missing_chat │ │ └── zbf_yml_missing_code │ ├── test_ai.py │ ├── test_chat_to_files.py │ ├── test_file_selector_enhancements.py │ ├── test_git.py │ ├── test_salvage_correct_hunks.py │ └── test_token_usage.py ├── mock_ai.py ├── test_data │ └── mona_lisa.jpg ├── test_install.py ├── test_project_config.py └── tools │ └── example_snake_files.py └── tox.ini /.dockerignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # Distribution / packaging 9 | dist/ 10 | build/ 11 | *.egg-info/ 12 | *.egg 13 | 14 | # Virtual environments 15 | .env 16 | .env.sh 17 | venv/ 18 | ENV/ 19 | 20 | # IDE-specific files 21 | .vscode/ 22 | .idea/ 23 | 24 | # Compiled Python modules 25 | *.pyc 26 | *.pyo 27 | *.pyd 28 | 29 | # Python testing 30 | .pytest_cache/ 31 | .ruff_cache/ 32 | .coverage 33 | .mypy_cache/ 34 | 35 | # macOS specific files 36 | .DS_Store 37 | 38 | # Windows specific files 39 | Thumbs.db 40 | 41 | # this application's specific files 42 | archive 43 | 44 | # any log file 45 | *log.txt 46 | todo 47 | scratchpad 48 | 49 | # Ignore GPT Engineer files 50 | projects 51 | !projects/example 52 | 53 | # Pyenv 54 | .python-version 55 | 56 | # Benchmark files 57 | benchmark 58 | !benchmark/*/prompt 59 | 60 | .gpte_consent 61 | -------------------------------------------------------------------------------- /.env.template: -------------------------------------------------------------------------------- 1 | ### OpenAI Setup ### 2 | 3 | # OPENAI_API_KEY=Your personal OpenAI API key from https://platform.openai.com/account/api-keys 4 | OPENAI_API_KEY=... 5 | ANTHROPIC_API_KEY=... 6 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | .github/workflows/ @ATheorell 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [antonosika] 4 | patreon: gpt_eng 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Policy and info 11 | - Maintainers will close issues that have been stale for 14 days if they contain relevant answers. 12 | - Adding the label "sweep" will automatically turn the issue into a coded pull request. Works best for mechanical tasks. More info/syntax at: https://docs.sweep.dev/ 13 | 14 | ## Expected Behavior 15 | 16 | Please describe the behavior you are expecting. 17 | 18 | ## Current Behavior 19 | 20 | What is the current behavior? 21 | 22 | ## Failure Information 23 | 24 | Information about the failure, including environment details, such as LLM used. 25 | 26 | ### Failure Logs 27 | 28 | If your project includes a debug_log_file.txt, kindly upload it from your_project/.gpteng/memory/ directory. This file encompasses all the necessary logs. Should the file prove extensive, consider utilizing GitHub's "add files" functionality. 29 | 30 | ## System Information 31 | 32 | Please copy and paste the output of the `gpte --sysinfo` command as part of your bug report. 33 | 34 | ## Installation Method 35 | 36 | Please specify whether you installed GPT-Engineer using `pip install` or by building the repository. 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-clarification.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation improvement 3 | about: Inaccuracies, inadequacies in the docs pages 4 | title: '' 5 | labels: documentation, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Policy and info 11 | - Maintainers will close issues that have been stale for 14 days if they contain relevant answers. 12 | - Adding the label "sweep" will automatically turn the issue into a coded pull request. Works best for mechanical tasks. More info/syntax at: https://docs.sweep.dev/ 13 | 14 | 15 | ## Description 16 | A clear and concise description of how the documentation at https://gpt-engineer.readthedocs.io/en/latest/ is providing wrong/insufficient information. 17 | 18 | ## Suggestion 19 | How can it be improved 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Policy and info 11 | - Maintainers will close issues that have been stale for 14 days if they contain relevant answers. 12 | - Adding the label "sweep" will automatically turn the issue into a coded pull request. Works best for mechanical tasks. More info/syntax at: https://docs.sweep.dev/ 13 | - Consider adding the label "good first issue" for interesting, but easy features. 14 | 15 | ## Feature description 16 | A clear and concise description of what you would like to have 17 | 18 | ## Motivation/Application 19 | Why is this feature useful? 20 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.** 2 | 3 | ## How Has This Been Tested? 4 | 5 | Please describe if you have either: 6 | 7 | - Generated the "example" project 8 | - Ran the entire benchmark suite 9 | - Something else 10 | -------------------------------------------------------------------------------- /.github/workflows/automation.yml: -------------------------------------------------------------------------------- 1 | name: Automation Workflow 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | issues: 7 | types: [opened, edited, reopened] 8 | pull_request: 9 | types: [opened, edited, reopened] 10 | 11 | jobs: 12 | mark-stale-issues: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Mark stale issues 16 | uses: actions/stale@v4 17 | with: 18 | stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs.' 19 | days-before-stale: 60 20 | 21 | # Add additional jobs as needed 22 | # job-name: 23 | # runs-on: ubuntu-latest 24 | # steps: 25 | # - name: Job step name 26 | # uses: action-name@version 27 | # with: 28 | # parameter1: value1 29 | # 30 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Tox pytest all python versions 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - gpt_engineer/** 8 | - tests/** 9 | pull_request: 10 | branches: [ main ] 11 | 12 | concurrency: 13 | group: ${{ github.workflow }} - ${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | test: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | python-version: ['3.10', '3.11', '3.12'] 22 | 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v3 26 | 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version == '3.12' && '3.12.3' || matrix.python-version }} # Using 3.12.3 to resolve Pydantic ForwardRef issue 31 | cache: 'pip' # Note that pip is for the tox level. Poetry is still used for installing the specific environments (tox.ini) 32 | 33 | - name: Check Python Version 34 | run: python --version 35 | 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install --upgrade pip 39 | pip install tox==4.15.0 poetry 40 | 41 | - name: Run tox 42 | env: 43 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 44 | run: tox 45 | 46 | # Temporarily disabling codecov until we resolve codecov rate limiting issue 47 | # - name: Report coverage 48 | # run: | 49 | # bash <(curl -s https://codecov.io/bash) 50 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | 12 | permissions: 13 | contents: write 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - uses: actions/setup-python@v4 19 | 20 | - uses: pre-commit/action@v3.0.0 21 | with: 22 | extra_args: --all-files 23 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Build and publish Python packages to PyPI 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: 7 | - published 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: 15 | - "3.10" 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | # Removed the cache line that was here 23 | 24 | # Install Poetry 25 | - name: Install Poetry 26 | run: | 27 | curl -sSL https://install.python-poetry.org | python3 - 28 | 29 | # Add Poetry to PATH 30 | - name: Add Poetry to PATH 31 | run: echo "$HOME/.local/bin" >> $GITHUB_PATH 32 | 33 | # Cache Poetry's dependencies based on the lock file 34 | - name: Set up Poetry cache 35 | uses: actions/cache@v3 36 | with: 37 | path: ~/.cache/pypoetry 38 | key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} 39 | restore-keys: | 40 | ${{ runner.os }}-poetry- 41 | 42 | # Install dependencies using Poetry (if any) 43 | - name: Install dependencies 44 | run: poetry install 45 | 46 | # Build package using Poetry 47 | - name: Build package 48 | run: poetry build --format sdist 49 | 50 | # Upload package as build artifact 51 | - uses: actions/upload-artifact@v3 52 | with: 53 | name: package 54 | path: dist/ 55 | 56 | publish: 57 | runs-on: ubuntu-latest 58 | needs: build 59 | environment: 60 | name: pypi 61 | url: https://pypi.org/p/gpt-engineer 62 | permissions: 63 | id-token: write 64 | steps: 65 | - uses: actions/download-artifact@v3 66 | with: 67 | name: package 68 | path: dist/ 69 | 70 | - name: Publish packages to PyPI 71 | uses: pypa/gh-action-pypi-publish@release/v1 72 | with: 73 | password: ${{ secrets.PYPI_API_TOKEN }} 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | .history/ 8 | 9 | # Distribution / packaging 10 | dist/ 11 | build/ 12 | *.egg-info/ 13 | *.egg 14 | 15 | # Virtual environments 16 | .env 17 | .env.sh 18 | venv/ 19 | ENV/ 20 | venv_test_installation/ 21 | 22 | # IDE-specific files 23 | .vscode/ 24 | .idea/ 25 | 26 | # Compiled Python modules 27 | *.pyc 28 | *.pyo 29 | *.pyd 30 | 31 | # Python testing 32 | .pytest_cache/ 33 | .ruff_cache/ 34 | .mypy_cache/ 35 | .coverage 36 | coverage.* 37 | 38 | # macOS specific files 39 | .DS_Store 40 | 41 | # Windows specific files 42 | Thumbs.db 43 | 44 | # this application's specific files 45 | archive 46 | 47 | # any log file 48 | *log.txt 49 | todo 50 | scratchpad 51 | 52 | # Pyenv 53 | .python-version 54 | 55 | .gpte_consent 56 | 57 | # projects folder apart from default prompt 58 | 59 | projects/* 60 | !projects/example/prompt 61 | !projects/example-improve 62 | !projects/example-vision 63 | 64 | # docs 65 | 66 | docs/_build 67 | docs/applications 68 | docs/benchmark 69 | docs/cli 70 | docs/core 71 | docs/intro 72 | docs/tools 73 | 74 | # coding assistants 75 | .aider* 76 | .gpteng 77 | 78 | # webapp specific 79 | webapp/node_modules 80 | webapp/package-lock.json 81 | 82 | webapp/.next/ 83 | 84 | .langchain.db 85 | 86 | # TODO files 87 | /!todo* 88 | 89 | #ignore tox files 90 | .tox 91 | 92 | # locally saved datasets 93 | gpt_engineer/benchmark/benchmarks/apps/dataset 94 | gpt_engineer/benchmark/benchmarks/mbpp/dataset 95 | 96 | gpt_engineer/benchmark/minimal_bench_config.toml 97 | 98 | test.json 99 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | fail_fast: true 4 | default_stages: [commit] 5 | 6 | repos: 7 | - repo: https://github.com/psf/black 8 | rev: 23.3.0 9 | hooks: 10 | - id: black 11 | args: [--config, pyproject.toml] 12 | types: [python] 13 | 14 | - repo: https://github.com/charliermarsh/ruff-pre-commit 15 | rev: "v0.0.272" 16 | hooks: 17 | - id: ruff 18 | args: [--fix, --exit-non-zero-on-fix] 19 | 20 | - repo: https://github.com/pre-commit/pre-commit-hooks 21 | rev: v4.4.0 22 | hooks: 23 | - id: check-toml 24 | - id: check-yaml 25 | - id: detect-private-key 26 | - id: end-of-file-fixer 27 | - id: trailing-whitespace 28 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | # You can also specify other tool versions: 14 | # nodejs: "19" 15 | # rust: "1.64" 16 | # golang: "1.19" 17 | jobs: 18 | post_create_environment: 19 | - pip install poetry 20 | post_install: 21 | - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with docs 22 | pre_build: 23 | - python docs/create_api_rst.py 24 | 25 | # Build documentation in the "docs/" directory with Sphinx 26 | sphinx: 27 | configuration: docs/conf.py 28 | 29 | # Optionally build your docs in additional formats such as PDF and ePub 30 | # formats: 31 | # - pdf 32 | # - epub 33 | 34 | # Optional but recommended, declare the Python requirements required 35 | # to build your documentation 36 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 37 | #python: 38 | # install: 39 | # - requirements: docs/requirements.txt 40 | -------------------------------------------------------------------------------- /Acknowledgements.md: -------------------------------------------------------------------------------- 1 | # We thank the following people for inspiration 2 | 3 | | Person | Content | File(s) | Source | 4 | |----|---|---|---| 5 | | Paul Gauthier | The prompt for the `improve code` step is strongly based on Paul's prompt in Aider | /preprompts/improve.txt | https://github.com/paul-gauthier/aider/blob/main/aider/coders/editblock_coder.py 6 | -------------------------------------------------------------------------------- /DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | 3 | gpt-engineer is an experimental application and is provided "as-is" without any warranty, express or implied. By using this software, you agree to assume all risks associated with its use, including but not limited to data loss, system failure, or any other issues that may arise. 4 | 5 | The developers and contributors of this project do not accept any responsibility or liability for any losses, damages, or other consequences that may occur as a result of using this software. You are solely responsible for any decisions and actions taken based on the information provided by gpt-engineer. 6 | 7 | Please note that the use of the GPT-4 language model can be expensive due to its token usage. By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges. 8 | 9 | As an autonomous experiment, gpt-engineer may generate code or take actions that are not in line with real-world business practices or legal requirements. It is your responsibility to ensure that any actions or decisions made by the generated code comply with all applicable laws, regulations, and ethical standards. The developers and contributors of this project shall not be held responsible for any consequences arising from the use of this software. 10 | 11 | By using gpt-engineer, you agree to indemnify, defend, and hold harmless the developers, contributors, and any affiliated parties from and against any and all claims, damages, losses, liabilities, costs, and expenses (including reasonable attorneys' fees) arising from your use of this software or your violation of these terms. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Anton Osika 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include gpt_engineer/preprompts * 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #Sets the default shell for executing commands as /bin/bash and specifies command should be executed in a Bash shell. 2 | SHELL := /bin/bash 3 | 4 | # Color codes for terminal output 5 | COLOR_RESET=\033[0m 6 | COLOR_CYAN=\033[1;36m 7 | COLOR_GREEN=\033[1;32m 8 | 9 | # Defines the targets help, install, dev-install, and run as phony targets. 10 | .PHONY: help install run 11 | 12 | #sets the default goal to help when no target is specified on the command line. 13 | .DEFAULT_GOAL := help 14 | 15 | #Disables echoing of commands. 16 | .SILENT: 17 | 18 | #Sets the variable name to the second word from the MAKECMDGOALS. 19 | name := $(word 2,$(MAKECMDGOALS)) 20 | 21 | #Defines a target named help. 22 | help: 23 | @echo "Please use 'make ' where is one of the following:" 24 | @echo " help Return this message with usage instructions." 25 | @echo " install Will install the dependencies using Poetry." 26 | @echo " run Runs GPT Engineer on the folder with the given name." 27 | 28 | #Defines a target named install. This target will install the project using Poetry. 29 | install: poetry-install install-pre-commit farewell 30 | 31 | #Defines a target named poetry-install. This target will install the project dependencies using Poetry. 32 | poetry-install: 33 | @echo -e "$(COLOR_CYAN)Installing project with Poetry...$(COLOR_RESET)" && \ 34 | poetry install 35 | 36 | #Defines a target named install-pre-commit. This target will install the pre-commit hooks. 37 | install-pre-commit: 38 | @echo -e "$(COLOR_CYAN)Installing pre-commit hooks...$(COLOR_RESET)" && \ 39 | poetry run pre-commit install 40 | 41 | #Defines a target named farewell. This target will print a farewell message. 42 | farewell: 43 | @echo -e "$(COLOR_GREEN)All done!$(COLOR_RESET)" 44 | 45 | #Defines a target named run. This target will run GPT Engineer on the folder with the given name. 46 | run: 47 | @echo -e "$(COLOR_CYAN)Running GPT Engineer on $(COLOR_GREEN)$(name)$(COLOR_CYAN) folder...$(COLOR_RESET)" && \ 48 | poetry run gpt-engineer projects/$(name) 49 | 50 | # Counts the lines of code in the project 51 | cloc: 52 | cloc . --exclude-dir=node_modules,dist,build,.mypy_cache,benchmark --exclude-list-file=.gitignore --fullpath --not-match-d='docs/_build' --by-file 53 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | image 4 | 5 | 6 | This document is a general roadmap guide to the gpt-engineer project's strategic direction. 7 | Our goal is to continually improve by focusing on three main pillars: 8 | - User Experience, 9 | - Technical Features, and 10 | - Performance Tracking/Testing. 11 | 12 | Each pillar is supported by a set of epics, reflecting our major goals and initiatives. 13 | 14 | 15 | ## Tracking Progress with GitHub Projects 16 | 17 | We are using [GitHub Projects](https://github.com/orgs/gpt-engineer-org/projects/3) to track the progress of our roadmap. 18 | 19 | Each issue within our project is categorized under one of the main pillars and, in most cases, associated epics. You can check our [Project's README](https://github.com/orgs/gpt-engineer-org/projects/3?pane=info) section to understand better our logic and organization. 20 | 21 | 22 | 23 | # How you can help out 24 | 25 | You can: 26 | 27 | - Post a "design" as a Google Doc in our [Discord](https://discord.com/channels/1119885301872070706/1120698764445880350), and ask for feedback to address one of the items in the roadmap 28 | - Submit PRs to address one of the items in the roadmap 29 | - Do a review of someone else's PR and propose next steps (further review, merge, close) 30 | 31 | 🙌 Volunteer work in any of these will get acknowledged.🙌 32 | -------------------------------------------------------------------------------- /TERMS_OF_USE.md: -------------------------------------------------------------------------------- 1 | # Terms of Use 2 | 3 | Welcome to gpt-engineer! By utilizing this powerful tool, you acknowledge and agree to the following comprehensive Terms of Use. We also encourage you to review the linked [disclaimer of warranty](https://github.com/gpt-engineer-org/gpt-engineer/blob/main/DISCLAIMER.md) for additional information. 4 | 5 | Both OpenAI, L.L.C. and the dedicated creators behind the remarkable gpt-engineer have implemented a data collection process focused on enhancing the product's capabilities. This endeavor is undertaken with utmost care and dedication to safeguarding user privacy. Rest assured that no information that could be directly attributed to any individual is stored. 6 | 7 | It's important to be aware that the utilization of natural text inputs, including the 'prompt' and 'feedback' files, may be subject to storage. While it's theoretically possible to establish connections between a person's writing style or content within these files and their real-life identity, please note that the creators of gpt-engineer explicitly assure that such attempts will never be made. 8 | 9 | For a deeper understanding of OpenAI's overarching terms of use, we encourage you to explore the details available [here](https://openai.com/policies/terms-of-use). 10 | 11 | Optionally, gpt-engineer collects usage data for the purpose of improving gpt-engineer. Data collection only happens when a consent file called .gpte_consent is present in the gpt-engineer directory. Note that gpt-engineer cannot prevent that data streams passing through gpt-engineer to a third party may be stored by that third party (for example OpenAI). 12 | 13 | Your engagement with gpt-engineer is an acknowledgment and acceptance of these terms, demonstrating your commitment to using this tool responsibly and within the bounds of ethical conduct. We appreciate your trust and look forward to the exciting possibilities that gpt-engineer can offer in your endeavors. 14 | -------------------------------------------------------------------------------- /WINDOWS_README.md: -------------------------------------------------------------------------------- 1 | # Windows Setup 2 | ## Short version 3 | 4 | On Windows, follow the standard [README.md](https://github.com/gpt-engineer-org/gpt-engineer/blob/main/README.md), but to set API key do one of: 5 | - `set OPENAI_API_KEY=[your api key]` on cmd 6 | - `$env:OPENAI_API_KEY="[your api key]"` on powershell 7 | 8 | ## Full setup guide 9 | 10 | Choose either **stable** or **development**. 11 | 12 | For **stable** release: 13 | 14 | Run `pip install gpt-engineer` in the command line as an administrator 15 | 16 | Or: 17 | 18 | 1. Open your web browser and navigate to the Python Package Index (PyPI) website: . 19 | 2. On the PyPI page for the gpt-engineer package, locate the "Download files" section. Here you'll find a list of available versions and their corresponding download links. 20 | 3. Identify the version of gpt-engineer you want to install and click on the associated download link. This will download the package file (usually a .tar.gz or .whl file) to your computer. 21 | 4. Once the package file is downloaded, open your Python development environment or IDE. 22 | 5. In your Python development environment, look for an option to install packages or manage dependencies. The exact location and terminology may vary depending on your IDE. For example, in PyCharm, you can go to "File" > "Settings" > "Project: \" > "Python Interpreter" to manage packages. 23 | 6. In the package management interface, you should see a list of installed packages. Look for an option to add or install a new package. 24 | 7. Click on the "Add Package" or "Install Package" button. 25 | 8. In the package installation dialog, choose the option to install from a file or from a local source. 26 | 9. Browse and select the downloaded gpt-engineer package file from your computer. 27 | 28 | For **development**: 29 | 30 | - `git clone git@github.com:gpt-engineer-org/gpt-engineer.git` 31 | - `cd gpt-engineer` 32 | - `poetry install` 33 | - `poetry shell` to activate the virtual environment 34 | 35 | ### Setup 36 | 37 | With an api key from OpenAI: 38 | 39 | Run `set OPENAI_API_KEY=[your API key]` in the command line 40 | 41 | Or: 42 | 43 | 1. In the Start Menu, type to search for "Environment Variables" and click on "Edit the system environment variables". 44 | 2. In the System Properties window, click on the "Environment Variables" button. 45 | 3. In the Environment Variables window, you'll see two sections: User variables and System variables. 46 | 4. To set a user-specific environment variable, select the "New" button under the User variables section. 47 | 5. To set a system-wide environment variable, select the "New" button under the System variables section. 48 | 6. Enter the variable name "OPENAI_API_KEY" in the "Variable name" field. 49 | 7. Enter the variable value (e.g., your API key) in the "Variable value" field. 50 | 8. Click "OK" to save the changes. 51 | 9. Close any open command prompt or application windows and reopen them for the changes to take effect. 52 | 53 | Now you can use `%OPENAI_API_KEY%` when prompted to input your key. 54 | 55 | ### Run 56 | 57 | - Create an empty folder. If inside the repo, you can: 58 | - Run `xcopy /E projects\example projects\my-new-project` in the command line 59 | - Or hold CTRL and drag the folder down to create a copy, then rename to fit your project 60 | - Fill in the `prompt` file in your new folder 61 | - `gpt-engineer projects/my-new-project` 62 | - (Note, `gpt-engineer --help` lets you see all available options. For example `--steps use_feedback` lets you improve/fix code in a project) 63 | 64 | By running gpt-engineer you agree to our [ToS](https://github.com/gpt-engineer-org/gpt-engineer/blob/main/TERMS_OF_USE.md). 65 | 66 | ### Results 67 | 68 | - Check the generated files in `projects/my-new-project/workspace` 69 | -------------------------------------------------------------------------------- /citation.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.0.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: Osika 5 | given-names: Anton 6 | title: gpt-engineer 7 | version: 0.1.0 8 | date-released: 2023-04-23 9 | repository-code: https://github.com/gpt-engineer-org/gpt-engineer 10 | url: https://gpt-engineer.readthedocs.io 11 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | gpt-engineer: 3 | build: 4 | context: . 5 | dockerfile: docker/Dockerfile 6 | stdin_open: true 7 | tty: true 8 | # Set the API key from the .env file 9 | env_file: 10 | - .env 11 | ## OR set the API key directly 12 | # environment: 13 | # - OPENAI_API_KEY=YOUR_API_KEY 14 | image: gpt-engineer 15 | volumes: 16 | - ./projects/example:/project 17 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1: Builder stage 2 | FROM python:3.11-slim AS builder 3 | 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | tk \ 6 | tcl \ 7 | curl \ 8 | git \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | WORKDIR /app 12 | 13 | COPY . . 14 | 15 | RUN pip install --no-cache-dir -e . 16 | 17 | # Stage 2: Final stage 18 | FROM python:3.11-slim 19 | 20 | WORKDIR /app 21 | 22 | COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages 23 | COPY --from=builder /usr/local/bin /usr/local/bin 24 | COPY --from=builder /usr/bin /usr/bin 25 | COPY --from=builder /app . 26 | 27 | COPY docker/entrypoint.sh . 28 | 29 | ENTRYPOINT ["bash", "/app/entrypoint.sh"] 30 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started Using Docker 2 | 3 | This guide provides step-by-step instructions on how to set up and run the Docker environment for your GPT-Engineer project. 4 | 5 | ## Prerequisites 6 | 7 | - Docker installed on your machine. 8 | - Git (for cloning the repository). 9 | 10 | ## Setup Instructions 11 | 12 | ### Using Docker CLI 13 | 14 | 1. **Clone the Repository** 15 | 16 | ```bash 17 | git clone https://github.com/gpt-engineer-org/gpt-engineer.git 18 | cd gpt-engineer 19 | ``` 20 | 21 | 2. **Build the Docker Image** 22 | 23 | ```bash 24 | docker build --rm -t gpt-engineer -f docker/Dockerfile . 25 | ``` 26 | 27 | 3. **Run the Docker Container** 28 | 29 | ```bash 30 | docker run -it --rm -e OPENAI_API_KEY="YOUR_OPENAI_KEY" -v ./your-project:/project gpt-engineer 31 | ``` 32 | 33 | Replace `YOUR_OPENAI_KEY` with your actual OpenAI API key. The `-v` flag mounts your local `your-project` directory inside the container. Replace this with your actual project directory. Ensure this directory contains all necessary files, including the `prompt` file. 34 | 35 | ### Using Docker Compose 36 | 37 | 1. **Clone the Repository** (if not already done) 38 | 39 | ```bash 40 | git clone https://github.com/gpt-engineer-org/gpt-engineer.git 41 | cd gpt-engineer 42 | ``` 43 | 44 | 2. **Build and Run using Docker Compose** 45 | 46 | ```bash 47 | docker-compose -f docker-compose.yml build 48 | docker-compose run --rm gpt-engineer 49 | ``` 50 | 51 | Set the `OPENAI_API_KEY` in the `docker/docker-compose.yml` using an `.env` file or as an environment variable. Mount your project directory to the container using volumes, e.g., `"./projects/example:/project"` where `./projects/example` is the path to your project directory. 52 | 53 | 3. **Another alternative using Docker Compose** 54 | 55 | Since there is only one `docker-compose.yml` file, you could run it without the -f option. 56 | - `docker compose up -d --build` - To build and start the containers defined in your `docker-compose.yml` file in detached mode 57 | - `docker compose up -d` - To start the containers defined in your `docker-compose.yml` file in detached mode 58 | - `docker compose down` - To stop and remove all containers, networks, and volumes associated with the `docker-compose.yml` 59 | - `docker compose restart` - To restart the containers defined in the `docker-compose.yml` file 60 | 61 | ## Debugging 62 | 63 | To facilitate debugging, you can run a shell inside the built Docker image: 64 | 65 | ```bash 66 | docker run -it --entrypoint /bin/bash gpt-engineer 67 | ``` 68 | 69 | This opens a shell inside the Docker container, allowing you to execute commands and inspect the environment manually. 70 | -------------------------------------------------------------------------------- /docker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | project_dir="/project" 5 | 6 | # Run the gpt engineer script 7 | gpt-engineer $project_dir "$@" 8 | 9 | # Patch the permissions of the generated files to be owned by nobody except prompt file 10 | find "$project_dir" -mindepth 1 -maxdepth 1 ! -path "$project_dir/prompt" -exec chown -R nobody:nogroup {} + -exec chmod -R 777 {} + 11 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = gpt_engineer 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/code_conduct_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../.github/CODE_OF_CONDUCT.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /docs/contributing_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../.github/CONTRIBUTING.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /docs/create_api_rst.py: -------------------------------------------------------------------------------- 1 | """Script for auto-generating api_reference.rst""" 2 | import glob 3 | import re 4 | 5 | from pathlib import Path 6 | 7 | ROOT_DIR = Path(__file__).parents[1].absolute() 8 | print(ROOT_DIR) 9 | PKG_DIR = ROOT_DIR / "gpt_engineer" 10 | WRITE_FILE = Path(__file__).parent / "api_reference.rst" 11 | 12 | 13 | def load_members() -> dict: 14 | members: dict = {} 15 | for py in glob.glob(str(PKG_DIR) + "/**/*.py", recursive=True): 16 | module = py[len(str(PKG_DIR)) + 1 :].replace(".py", "").replace("/", ".") 17 | top_level = module.split(".")[0] 18 | if top_level not in members: 19 | members[top_level] = {"classes": [], "functions": []} 20 | with open(py, "r") as f: 21 | for line in f.readlines(): 22 | cls = re.findall(r"^class ([^_].*)\(", line) 23 | members[top_level]["classes"].extend([module + "." + c for c in cls]) 24 | func = re.findall(r"^def ([^_].*)\(", line) 25 | afunc = re.findall(r"^async def ([^_].*)\(", line) 26 | func_strings = [module + "." + f for f in func + afunc] 27 | members[top_level]["functions"].extend(func_strings) 28 | return members 29 | 30 | 31 | def construct_doc(members: dict) -> str: 32 | full_doc = """\ 33 | .. _api_reference: 34 | 35 | ============= 36 | API Reference 37 | ============= 38 | 39 | """ 40 | for module, _members in sorted(members.items(), key=lambda kv: kv[0]): 41 | classes = _members["classes"] 42 | functions = _members["functions"] 43 | if not (classes or functions): 44 | continue 45 | 46 | module_title = module.replace("_", " ").title() 47 | if module_title == "Llms": 48 | module_title = "LLMs" 49 | section = f":mod:`gpt_engineer.{module}`: {module_title}" 50 | full_doc += f"""\ 51 | {section} 52 | {'=' * (len(section) + 1)} 53 | 54 | .. automodule:: gpt_engineer.{module} 55 | :no-members: 56 | :no-inherited-members: 57 | 58 | """ 59 | 60 | if classes: 61 | cstring = "\n ".join(sorted(classes)) 62 | full_doc += f"""\ 63 | Classes 64 | -------------- 65 | .. currentmodule:: gpt_engineer 66 | 67 | .. autosummary:: 68 | :toctree: {module} 69 | :template: class.rst 70 | 71 | {cstring} 72 | 73 | """ 74 | if functions: 75 | fstring = "\n ".join(sorted(functions)) 76 | full_doc += f"""\ 77 | Functions 78 | -------------- 79 | .. currentmodule:: gpt_engineer 80 | 81 | .. autosummary:: 82 | :toctree: {module} 83 | 84 | {fstring} 85 | 86 | """ 87 | return full_doc 88 | 89 | 90 | def main() -> None: 91 | members = load_members() 92 | full_doc = construct_doc(members) 93 | with open(WRITE_FILE, "w") as f: 94 | f.write(full_doc) 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /docs/disclaimer_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../DISCLAIMER.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /docs/docs_building.md: -------------------------------------------------------------------------------- 1 | Building Docs with Sphinx 2 | ========================= 3 | 4 | This example shows a basic Sphinx docs project with Read the Docs. This project is using `sphinx` with `readthedocs` 5 | project template. 6 | 7 | Some useful links are given below to lear and contribute in the project. 8 | 9 | 📚 [docs/](https://www.sphinx-doc.org/en/master/usage/quickstart.html)
10 | A basic Sphinx project lives in `docs/`, it was generated using Sphinx defaults. All the `*.rst` & `*.md` make up sections in the documentation. Both `.rst` and `.md` formats are supported in this project 11 | 12 | ⚙️ [.readthedocs.yaml](https://docs.readthedocs.io/en/stable/config-file/v2.html)
13 | Read the Docs Build configuration is stored in `.readthedocs.yaml`. 14 | 15 | 16 | Example Project usage 17 | --------------------- 18 | 19 | ``Poetry`` is the package manager for ``gpt-engineer``. In order to build documentation, we have to add docs requirements in 20 | development environment. 21 | 22 | This project has a standard readthedocs layout which is built by Read the Docs almost the same way that you would build it 23 | locally (on your own laptop!). 24 | 25 | You can build and view this documentation project locally - we recommend that you activate a ``poetry shell``. 26 | 27 | Update ``repository_stats.md`` file under ``docs/intro`` 28 | 29 | ```console 30 | # Install required Python dependencies (MkDocs etc.) 31 | poetry install 32 | cd docs/ 33 | 34 | # Create the `api_reference.rst` 35 | python create_api_rst.py 36 | 37 | # Build the docs 38 | make html 39 | 40 | ## Alternatively, to rebuild the docs on changes with live-reload in the browser 41 | sphinx-autobuild . _build/html 42 | ``` 43 | 44 | Project Docs Structure 45 | ---------------------- 46 | If you are new to Read the Docs, you may want to refer to the [Read the Docs User documentation](https://docs.readthedocs.io/). 47 | 48 | Below is the rundown of documentation structure for `pandasai`, you need to know: 49 | 50 | 1. place your `docs/` folder alongside your Python project. 51 | 2. copy `.readthedocs.yaml` and the `docs/` folder into your project root. 52 | 3. `docs/api_reference.rst` contains the API documentation created using `docstring`. Run the `create_api_rst.py` to update the API reference file. 53 | 4. Project is using standard Google Docstring Style. 54 | 5. Rebuild the documentation locally to see that it works. 55 | 6. Documentation are hosted on [Read the Docs tutorial](https://docs.readthedocs.io/en/stable/tutorial/) 56 | 57 | 58 | Read the Docs tutorial 59 | ---------------------- 60 | 61 | To get started with Read the Docs, you may also refer to the 62 | [Read the Docs tutorial](https://docs.readthedocs.io/en/stable/tutorial/). I 63 | 64 | With every release, build the documentation manually. 65 | -------------------------------------------------------------------------------- /docs/examples/open_llms/README.md: -------------------------------------------------------------------------------- 1 | # Test that the Open LLM is running 2 | 3 | First start the server by using only CPU: 4 | 5 | ```bash 6 | export model_path="TheBloke/CodeLlama-13B-GGUF/codellama-13b.Q8_0.gguf" 7 | python -m llama_cpp.server --model $model_path 8 | ``` 9 | 10 | Or with GPU support (recommended): 11 | 12 | ```bash 13 | python -m llama_cpp.server --model TheBloke/CodeLlama-13B-GGUF/codellama-13b.Q8_0.gguf --n_gpu_layers 1 14 | ``` 15 | 16 | If you have more `GPU` layers available set `--n_gpu_layers` to the higher number. 17 | 18 | To find the amount of available run the above command and look for `llm_load_tensors: offloaded 1/41 layers to GPU` in the output. 19 | 20 | ## Test API call 21 | 22 | Set the environment variables: 23 | 24 | ```bash 25 | export OPENAI_API_BASE="http://localhost:8000/v1" 26 | export OPENAI_API_KEY="sk-xxx" 27 | export MODEL_NAME="CodeLlama" 28 | ```` 29 | 30 | Then ping the model via `python` using `OpenAI` API: 31 | 32 | ```bash 33 | python examples/open_llms/openai_api_interface.py 34 | ``` 35 | 36 | If you're not using `CodeLLama` make sure to change the `MODEL_NAME` parameter. 37 | 38 | Or using `curl`: 39 | 40 | ```bash 41 | curl --request POST \ 42 | --url http://localhost:8000/v1/chat/completions \ 43 | --header "Content-Type: application/json" \ 44 | --data '{ "model": "CodeLlama", "prompt": "Who are you?", "max_tokens": 60}' 45 | ``` 46 | 47 | If this works also make sure that `langchain` interface works since that's how `gpte` interacts with LLMs. 48 | 49 | ## Langchain test 50 | 51 | ```bash 52 | export MODEL_NAME="CodeLlama" 53 | python examples/open_llms/langchain_interface.py 54 | ``` 55 | 56 | That's it 🤓 time to go back [to](/docs/open_models.md#running-the-example) and give `gpte` a try. 57 | -------------------------------------------------------------------------------- /docs/examples/open_llms/langchain_interface.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 4 | from langchain_openai import ChatOpenAI 5 | 6 | model = ChatOpenAI( 7 | model=os.getenv("MODEL_NAME"), 8 | temperature=0.1, 9 | callbacks=[StreamingStdOutCallbackHandler()], 10 | streaming=True, 11 | ) 12 | 13 | prompt = ( 14 | "Provide me with only the code for a simple python function that sums two numbers." 15 | ) 16 | 17 | model.invoke(prompt) 18 | -------------------------------------------------------------------------------- /docs/examples/open_llms/openai_api_interface.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from openai import OpenAI 4 | 5 | client = OpenAI( 6 | base_url=os.getenv("OPENAI_API_BASE"), api_key=os.getenv("OPENAI_API_KEY") 7 | ) 8 | 9 | response = client.chat.completions.create( 10 | model=os.getenv("MODEL_NAME"), 11 | messages=[ 12 | { 13 | "role": "user", 14 | "content": "Provide me with only the code for a simple python function that sums two numbers.", 15 | }, 16 | ], 17 | temperature=0.7, 18 | max_tokens=200, 19 | ) 20 | 21 | print(response.choices[0].message.content) 22 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to GPT-ENGINEER's Documentation 2 | ======================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: GET STARTED: 7 | 8 | introduction.md 9 | installation 10 | quickstart 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: USER GUIDES: 15 | 16 | windows_readme_link 17 | open_models.md 18 | tracing_debugging.md 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | :caption: CONTRIBUTE: 23 | 24 | contributing_link 25 | roadmap_link 26 | code_conduct_link 27 | disclaimer_link 28 | docs_building.md 29 | terms_link 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: PACKAGE API: 34 | 35 | api_reference 36 | 37 | Indices and tables 38 | ================== 39 | * :ref:`genindex` 40 | * :ref:`modindex` 41 | * :ref:`search` 42 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | 8 | Stable release 9 | -------------- 10 | 11 | To install ``gpt-engineer``, run this command in your terminal: 12 | 13 | .. code-block:: console 14 | 15 | $ python -m pip install gpt-engineer 16 | 17 | This is the preferred method to install ``gpt-engineer``, as it will always install the most recent stable release. 18 | 19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 20 | you through the process. 21 | 22 | .. note:: 23 | 24 | When reporting bugs, please specify your installation method (using `pip install` or by building the repository) in the "Installation Method" section of the bug report template. 25 | 26 | .. _pip: https://pip.pypa.io 27 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 28 | 29 | 30 | From sources 31 | ------------ 32 | 33 | The sources for ``gpt-engineer`` can be downloaded from the `Github repo`_. 34 | 35 | You can either clone the public repository: 36 | 37 | .. code-block:: console 38 | 39 | $ git clone https://github.com/gpt-engineer-org/gpt-engineer.git 40 | 41 | Once you have a copy of the source, you can install it with: 42 | 43 | .. code-block:: console 44 | 45 | $ cd gpt-engineer 46 | $ poetry install 47 | $ poetry shell 48 | 49 | 50 | .. _Github repo: https://github.com/gpt-engineer-org/gpt-engineer.git 51 | 52 | Troubleshooting 53 | --------------- 54 | 55 | For mac and linux system, there are sometimes slim python installations that do not include the ``gpt-engineer`` requirement tkinter, which is a standard library and thus not pip installable. 56 | 57 | To install tkinter on mac, you can for example use brew: 58 | 59 | .. code-block:: console 60 | 61 | $ brew install python-tk 62 | 63 | On debian-based linux systems you can use: 64 | 65 | .. code-block:: console 66 | 67 | $ sudo apt-get install python3-tk 68 | -------------------------------------------------------------------------------- /docs/introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | ``gpt-engineer`` is a project that uses LLMs (such as GPT-4) to automate the process of software engineering. It includes several Python scripts that interact with the LLM to generate code, clarify requirements, generate specifications, and more. 3 | 4 |
5 | 6 | ## Get started 7 | [Here’s](/en/latest/installation.html) how to install ``gpt-engineer``, set up your environment, and start building. 8 | 9 | We recommend following our [Quickstart](/en/latest/quickstart.html) guide to familiarize yourself with the framework by building your first application with ``gpt-engineer``. 10 | 11 |
12 | 13 | ## Example 14 | You can find an example of the project in action here. 15 | 16 | 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=file_processor 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Quickstart 3 | ========== 4 | 5 | Installation 6 | ============ 7 | 8 | To install LangChain run: 9 | 10 | .. code-block:: console 11 | 12 | $ python -m pip install gpt-engineer 13 | 14 | For more details, see our [Installation guide](/instllation.html). 15 | 16 | Setup API Key 17 | ============= 18 | 19 | Choose one of the following: 20 | 21 | - Export env variable (you can add this to ``.bashrc`` so that you don't have to do it each time you start the terminal) 22 | 23 | .. code-block:: console 24 | 25 | $ export OPENAI_API_KEY=[your api key] 26 | 27 | - Add it to the ``.env`` file: 28 | 29 | - Create a copy of ``.env.template`` named ``.env`` 30 | - Add your ``OPENAI_API_KEY`` in .env 31 | 32 | - If you want to use a custom model, visit our docs on `using open models and azure models <./open_models.html>`_. 33 | 34 | - To set API key on windows check the `Windows README <./windows_readme_link.html>`_. 35 | 36 | Building with ``gpt-engineer`` 37 | ============================== 38 | 39 | Create new code (default usage) 40 | ------------------------------- 41 | 42 | - Create an empty folder for your project anywhere on your computer 43 | - Create a file called ``prompt`` (no extension) inside your new folder and fill it with instructions 44 | - Run ``gpte `` with a relative path to your folder 45 | - For example, if you create a new project inside the gpt-engineer ``/projects`` directory: 46 | 47 | .. code-block:: console 48 | 49 | $ gpte projects/my-new-project 50 | 51 | Improve Existing Code 52 | --------------------- 53 | 54 | - Locate a folder with code which you want to improve anywhere on your computer 55 | - Create a file called ``prompt`` (no extension) inside your new folder and fill it with instructions for how you want to improve the code 56 | - Run ``gpte -i`` with a relative path to your folder 57 | - For example, if you want to run it against an existing project inside the gpt-engineer ``/projects`` directory: 58 | 59 | .. code-block:: console 60 | 61 | $ gpte projects/my-old-project -i 62 | 63 | By running ``gpt-engineer`` you agree to our `terms <./terms_link.html>`_. 64 | 65 | To **run in the browser** you can simply: 66 | 67 | .. image:: https://github.com/codespaces/badge.svg 68 | :target: https://github.com/gpt-engineer-org/gpt-engineer/codespaces 69 | -------------------------------------------------------------------------------- /docs/roadmap_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../ROADMAP.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /docs/terms_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../TERMS_OF_USE.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /docs/tracing_debugging.md: -------------------------------------------------------------------------------- 1 | Tracing and Debugging with Weights and Biases 2 | ============================ 3 | 4 | ## How to store results in Weights & Biases 5 | 6 | W&B Prompts is a suite of LLMOps tools built for the development of LLM-powered applications. Use W&B Prompts to visualize and inspect the execution flow of your LLMs, analyze the inputs and outputs of your LLMs, view the intermediate results and securely store and manage your prompts and LLM chain configurations. Read more at https://docs.wandb.ai/guides/prompts 7 | 8 | ```shell 9 | $ export WANDB_API_KEY="YOUR-KEY" 10 | $ export LANGCHAIN_WANDB_TRACING=true 11 | ``` 12 | 13 | Sign up for free at https://wandb.ai 14 | 15 | 16 | Debug and trace the execution of the AI generated code to compare across different experiments with `gpt-engineer` and related prompts 17 | ![](https://drive.google.com/uc?id=10wuLwyPbH00CoESsS2Q2q6mkdrtS91jd) 18 | 19 | 20 | Automatically capture and save terminal `stdout` to one easily accessible and shareable webpage 21 | ![](https://drive.google.com/uc?id=1gVva7ZfpwbTSBsnNvId6iq09Gw5ETOks) 22 | -------------------------------------------------------------------------------- /docs/windows_readme_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../WINDOWS_README.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /gpt_engineer/__init__.py: -------------------------------------------------------------------------------- 1 | # Adding convenience imports to the package 2 | 3 | # from gpt_engineer.tools import code_vector_repository 4 | # from gpt_engineer.core.default import on_disk_repository 5 | -------------------------------------------------------------------------------- /gpt_engineer/applications/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/gpt_engineer/applications/__init__.py -------------------------------------------------------------------------------- /gpt_engineer/applications/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/gpt_engineer/applications/cli/__init__.py -------------------------------------------------------------------------------- /gpt_engineer/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/gpt_engineer/benchmark/__init__.py -------------------------------------------------------------------------------- /gpt_engineer/benchmark/bench_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from pathlib import Path 3 | 4 | from tomlkit.items import Integer 5 | 6 | from gpt_engineer.core.project_config import read_config 7 | 8 | 9 | @dataclass 10 | class AppsConfig: 11 | active: bool | None = True 12 | test_start_index: int | None = 0 13 | test_end_index: int | None = 1 14 | train_start_index: int | None = 0 15 | train_end_index: int | None = 0 16 | examples_per_problem: int | None = 10 17 | 18 | 19 | @dataclass 20 | class MbppConfig: 21 | active: bool | None = True 22 | test_len: int | None = 1 23 | train_len: int | None = 0 24 | 25 | 26 | @dataclass 27 | class GptmeConfig: 28 | active: bool | None = True 29 | 30 | 31 | @dataclass 32 | class BenchConfig: 33 | """Configuration for the GPT Engineer CLI and gptengineer.app via `gpt-engineer.toml`.""" 34 | 35 | apps: AppsConfig = field(default_factory=AppsConfig) 36 | mbpp: MbppConfig = field(default_factory=MbppConfig) 37 | gptme: GptmeConfig = field(default_factory=GptmeConfig) 38 | 39 | @classmethod 40 | def from_toml(cls, config_file: Path | str): 41 | if isinstance(config_file, str): 42 | config_file = Path(config_file) 43 | config_dict = read_config(config_file) 44 | return cls.from_dict(config_dict) 45 | 46 | @classmethod 47 | def from_dict(cls, config_dict: dict): 48 | return cls( 49 | apps=AppsConfig(**config_dict.get("apps", {})), 50 | mbpp=MbppConfig(**config_dict.get("mbpp", {})), 51 | gptme=GptmeConfig(**config_dict.get("gptme", {})), 52 | ) 53 | 54 | @staticmethod 55 | def recursive_resolve(data_dict): 56 | for key, value in data_dict.items(): 57 | if isinstance(value, Integer): 58 | data_dict[key] = int(value) 59 | elif isinstance(value, dict): 60 | BenchConfig.recursive_resolve(value) 61 | 62 | def to_dict(self): 63 | dict_config = { 64 | benchmark_name: {key: val for key, val in spec_config.__dict__.items()} 65 | for benchmark_name, spec_config in self.__dict__.items() 66 | } 67 | BenchConfig.recursive_resolve(dict_config) 68 | 69 | return dict_config 70 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/apps/load.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for loading APPS evaluation tasks. 3 | 4 | This module provides functionality to load tasks for evaluating GPT-based models 5 | on smaller, more focused tasks. It defines a set of tasks with predefined prompts 6 | and assertions to benchmark the performance of AI models. 7 | 8 | Functions 9 | --------- 10 | load_apps : function 11 | Loads the APPS benchmark, which consists of a series coding problems. 12 | """ 13 | from pathlib import Path 14 | from subprocess import TimeoutExpired 15 | from typing import Union 16 | 17 | from datasets import Dataset, DatasetDict, load_dataset, load_from_disk 18 | 19 | from gpt_engineer.benchmark.bench_config import AppsConfig 20 | from gpt_engineer.benchmark.benchmarks.apps.problem import Problem 21 | from gpt_engineer.benchmark.types import Assertable, Benchmark, Task 22 | from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv 23 | from gpt_engineer.core.files_dict import FilesDict 24 | from gpt_engineer.core.prompt import Prompt 25 | 26 | DATASET_PATH = Path(__file__).parent / "dataset" 27 | 28 | 29 | class AppsAssertion: 30 | def __init__(self, expected: str, command: str): 31 | self.expected_output = self._format(expected) 32 | self.command = command 33 | 34 | def evaluate(self, assertable: Assertable) -> bool: 35 | # Create new execution environment for every run to avoid side effects 36 | env = DiskExecutionEnv() 37 | env.upload(assertable.files) 38 | pro = env.popen(self.command) 39 | try: 40 | stdout, stderr = pro.communicate(timeout=2) 41 | stdout, stderr = stdout.decode("utf-8"), stderr.decode("utf-8") 42 | except TimeoutExpired: 43 | print("Execution Timeout") 44 | return False 45 | 46 | return self.expected_output in self._format(stdout) 47 | 48 | def _format(self, string: str) -> str: 49 | return string.replace(" ", "").replace("\n", "") 50 | 51 | 52 | def _get_dataset() -> Union[Dataset, DatasetDict]: 53 | try: 54 | return load_from_disk(str(DATASET_PATH)) 55 | except FileNotFoundError: 56 | print("Dataset not found locally, downloading...") 57 | 58 | dataset = load_dataset("codeparrot/apps", trust_remote_code=True) 59 | dataset.save_to_disk(str(DATASET_PATH)) 60 | 61 | return dataset 62 | 63 | 64 | def load_apps(config: AppsConfig) -> Benchmark: 65 | """ 66 | Loads the APPS benchmark, which consists of a series coding problems. 67 | 68 | Returns 69 | ------- 70 | Benchmark 71 | A Benchmark object containing a list of Task objects for the APPS evaluation. 72 | """ 73 | dataset = _get_dataset() 74 | tasks = [] 75 | problems = list() 76 | for dataset_type in ["test", "train"]: 77 | problems += [ 78 | Problem( 79 | id=problem["problem_id"], 80 | question=problem["question"], 81 | input_output=problem["input_output"], 82 | starter_code=problem["starter_code"], 83 | ) 84 | for index, problem in enumerate(dataset[dataset_type]) 85 | if (index < config.__getattribute__(dataset_type + "_end_index")) 86 | and (index >= config.__getattribute__(dataset_type + "_start_index")) 87 | ] 88 | 89 | for problem in problems: 90 | prompt = Prompt( 91 | problem.question 92 | + "\nThe program, including its inputs, should be run from the command " 93 | "line like 'python main \"input1 input2 etc \"', with all inputs inside " 94 | "the quotation marks. The program should not read inputs from stdin." 95 | ) 96 | 97 | tasks.append( 98 | Task( 99 | name=str(problem.id), 100 | initial_code=FilesDict({"main.py": problem.starter_code}), 101 | command=None, # Explicitly setting `None` because each assertion specifies its command 102 | prompt=prompt, 103 | assertions={ 104 | f"correct output {i}": AppsAssertion( 105 | expected=problem.outputs[i], 106 | command="python main.py" + ' "' + problem.inputs[i] + '"', 107 | ).evaluate 108 | for i in range( 109 | min(len(problem.outputs), config.examples_per_problem) 110 | ) 111 | }, 112 | ) 113 | ) 114 | 115 | return Benchmark( 116 | name="apps", 117 | tasks=tasks, 118 | ) 119 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/apps/problem.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from dataclasses import dataclass 4 | from functools import cached_property 5 | from typing import List 6 | 7 | 8 | @dataclass(frozen=True) 9 | class Problem: 10 | id: int 11 | question: str 12 | input_output: str 13 | starter_code: str 14 | 15 | @property 16 | def inputs(self) -> List[str]: 17 | return self._parsed_inputs_outputs["inputs"] 18 | 19 | @property 20 | def outputs(self) -> List[str]: 21 | return self._parsed_inputs_outputs["outputs"] 22 | 23 | @cached_property 24 | def _parsed_inputs_outputs(self): 25 | return json.loads(self.input_output.replace("\n", "")) 26 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/apps/problems.py: -------------------------------------------------------------------------------- 1 | # TODO: Pick problems 2 | # Temporary testing against these problems 3 | PROBLEM_IDS = list(range(0, 50)) 4 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/gptme/load.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for loading GPT-Me evaluation tasks. 3 | 4 | This module provides functionality to load tasks for evaluating GPT-based models 5 | on smaller, more focused tasks. It defines a set of tasks with predefined prompts 6 | and assertions to benchmark the performance of AI models. 7 | 8 | Functions 9 | --------- 10 | load_gptme : function 11 | Loads the GPT-Me benchmark, which consists of a series of tasks for evaluation. 12 | """ 13 | from gpt_engineer.benchmark.bench_config import GptmeConfig 14 | from gpt_engineer.benchmark.types import Benchmark, Task 15 | from gpt_engineer.core.files_dict import FilesDict 16 | from gpt_engineer.core.prompt import Prompt 17 | 18 | 19 | def load_gptme(config: GptmeConfig) -> Benchmark: 20 | """ 21 | Loads the GPT-Me benchmark, which consists of a series of tasks for evaluation. 22 | 23 | Returns 24 | ------- 25 | Benchmark 26 | A Benchmark object containing a list of Task objects for the GPT-Me evaluation. 27 | """ 28 | return Benchmark( 29 | name="gptme", 30 | tasks=[ 31 | Task( 32 | name="hello", 33 | initial_code=FilesDict({"hello.py": "print('Hello, world!')"}), 34 | command="python hello.py", 35 | prompt=Prompt("Change the code in hello.py to print 'Hello, human!'"), 36 | assertions={ 37 | "correct output": lambda assertable: assertable.stdout 38 | == "Hello, human!\n", 39 | "correct file": lambda assertable: assertable.files[ 40 | "hello.py" 41 | ].strip() 42 | == "print('Hello, human!')", 43 | }, 44 | ), 45 | Task( 46 | name="hello-patch", 47 | initial_code=FilesDict({"hello.py": "print('Hello, world!')"}), 48 | command="python hello.py", 49 | prompt=Prompt("Patch the code in hello.py to print 'Hello, human!'"), 50 | assertions={ 51 | "correct output": lambda assertable: assertable.stdout 52 | == "Hello, human!\n", 53 | "correct file": lambda assertable: assertable.files[ 54 | "hello.py" 55 | ].strip() 56 | == "print('Hello, human!')", 57 | }, 58 | ), 59 | Task( 60 | name="hello-ask", 61 | initial_code=FilesDict({"hello.py": "print('Hello, world!')"}), 62 | command="echo 'Erik' | python hello.py", 63 | prompt=Prompt( 64 | "modify hello.py to ask the user for their name and print 'Hello, !'. don't try to execute it" 65 | ), 66 | assertions={ 67 | "correct output": lambda assertable: "Hello, Erik!" 68 | in assertable.stdout, 69 | }, 70 | ), 71 | Task( 72 | name="prime100", 73 | initial_code=FilesDict( 74 | {} 75 | ), # Empty dictionary since no initial code is provided 76 | command="python prime.py", 77 | prompt=Prompt( 78 | "write a script prime.py that computes and prints the 100th prime number" 79 | ), 80 | assertions={ 81 | "correct output": lambda assertable: "541" 82 | in assertable.stdout.split(), 83 | }, 84 | ), 85 | Task( 86 | name="init-git", 87 | initial_code=FilesDict( 88 | {} 89 | ), # Empty dictionary since no initial code is provided 90 | command="git status", 91 | prompt=Prompt( 92 | "initialize a git repository, write a main.py file, and commit it" 93 | ), 94 | assertions={ 95 | "clean exit": lambda assertable: assertable.process.returncode == 0, 96 | "clean working tree": lambda assertable: "nothing to commit, working tree clean" 97 | in assertable.stdout, 98 | "main.py exists": lambda assertable: "main.py" in assertable.files, 99 | "we have a commit": lambda assertable: "No commits yet" 100 | not in assertable.stdout, 101 | }, 102 | ), 103 | ], 104 | ) 105 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/load.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for loading benchmarks. 3 | 4 | This module provides a central point to access different benchmarks by name. 5 | It maps benchmark names to their respective loading functions. 6 | 7 | Functions 8 | --------- 9 | get_benchmark : function 10 | Retrieves a Benchmark object by name. Raises ValueError if the benchmark is unknown. 11 | """ 12 | from gpt_engineer.benchmark.bench_config import BenchConfig 13 | from gpt_engineer.benchmark.benchmarks.apps.load import load_apps 14 | from gpt_engineer.benchmark.benchmarks.gptme.load import load_gptme 15 | from gpt_engineer.benchmark.benchmarks.mbpp.load import load_mbpp 16 | from gpt_engineer.benchmark.types import Benchmark 17 | 18 | BENCHMARKS = { 19 | "gptme": load_gptme, 20 | "apps": load_apps, 21 | "mbpp": load_mbpp, 22 | } 23 | 24 | 25 | def get_benchmark(name: str, config: BenchConfig) -> Benchmark: 26 | """ 27 | Retrieves a Benchmark object by name. Raises ValueError if the benchmark is unknown. 28 | 29 | Parameters 30 | ---------- 31 | name : str 32 | The name of the benchmark to retrieve. 33 | config : BenchConfig 34 | Configuration object for the benchmarks. 35 | 36 | Returns 37 | ------- 38 | Benchmark 39 | The Benchmark object corresponding to the given name. 40 | 41 | Raises 42 | ------ 43 | ValueError 44 | If the benchmark name is not found in the BENCHMARKS mapping. 45 | """ 46 | if name not in BENCHMARKS: 47 | raise ValueError(f"Unknown benchmark {name}.") 48 | return BENCHMARKS[name](config.__getattribute__(name)) 49 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/mbpp/load.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for loading MBPP evaluation tasks. 3 | 4 | This module provides functionality to load tasks for evaluating GPT-based models 5 | on smaller, more focused tasks. It defines a set of tasks with predefined prompts 6 | and assertions to benchmark the performance of AI models. 7 | 8 | Functions 9 | --------- 10 | load_mbpp : function 11 | Loads the MBPP benchmark, which consists of a series coding problems. 12 | """ 13 | from pathlib import Path 14 | from subprocess import TimeoutExpired 15 | from typing import Union 16 | 17 | from datasets import Dataset, DatasetDict, load_dataset, load_from_disk 18 | 19 | from gpt_engineer.benchmark.bench_config import MbppConfig 20 | from gpt_engineer.benchmark.benchmarks.mbpp.problem import Problem 21 | from gpt_engineer.benchmark.types import Assertable, Benchmark, Task 22 | from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv 23 | from gpt_engineer.core.files_dict import FilesDict 24 | from gpt_engineer.core.prompt import Prompt 25 | 26 | DATASET_PATH = Path(__file__).parent / "dataset" 27 | 28 | 29 | class MbppAssertion: 30 | def __init__(self, assertion: str): 31 | self.assertion = assertion 32 | 33 | def evaluate(self, assertable: Assertable) -> bool: 34 | generated_code = assertable.files["main.py"] 35 | code_with_assertion = f"{generated_code}\n{self.assertion}" 36 | 37 | # Create new execution environment for every run to avoid side effects 38 | env = DiskExecutionEnv() 39 | env.upload(FilesDict({"main.py": code_with_assertion})) 40 | pro = env.popen("python main.py") 41 | 42 | try: 43 | stdout, stderr = pro.communicate(timeout=2) 44 | stdout, stderr = stdout.decode("utf-8"), stderr.decode("utf-8") 45 | except TimeoutExpired: 46 | print("Execution Timeout") 47 | return False 48 | 49 | return not stderr 50 | 51 | 52 | def _get_dataset() -> Union[Dataset, DatasetDict]: 53 | try: 54 | return load_from_disk(str(DATASET_PATH)) 55 | except FileNotFoundError: 56 | print("Dataset not found locally, downloading...") 57 | 58 | dataset = load_dataset("mbpp", "sanitized", trust_remote_code=True) 59 | dataset.save_to_disk(str(DATASET_PATH)) 60 | 61 | return dataset 62 | 63 | 64 | def load_mbpp(config: MbppConfig) -> Benchmark: 65 | """ 66 | Loads the MBPP benchmark, which consists of a series coding problems. 67 | 68 | Returns 69 | ------- 70 | Benchmark 71 | A Benchmark object containing a list of Task objects for the MBPP evaluation. 72 | """ 73 | dataset = _get_dataset() 74 | tasks = [] 75 | problems = [] 76 | for dataset_type in ["test", "train"]: 77 | problems += [ 78 | Problem( 79 | source_file=problem["source_file"], 80 | task_id=problem["task_id"], 81 | prompt=problem["prompt"], 82 | code=problem["code"], 83 | test_imports=problem["test_imports"], 84 | test_list=problem["test_list"], 85 | ) 86 | for index, problem in enumerate(dataset[dataset_type]) 87 | if index < config.__getattribute__(dataset_type + "_len") 88 | ] 89 | 90 | for problem in problems: 91 | prompt = Prompt( 92 | problem.prompt 93 | + "Please extend given function without changing it's declaration including arguments." 94 | ) 95 | 96 | tasks.append( 97 | Task( 98 | name=str(problem.task_id), 99 | initial_code=FilesDict({"main.py": problem.starting_code}), 100 | command=None, # Explicitly setting `None` because each assertion runs code 101 | prompt=prompt, 102 | assertions={ 103 | f"correct assertion {i}": MbppAssertion( 104 | assertion=assertion 105 | ).evaluate 106 | for i, assertion in enumerate(problem.test_list) 107 | }, 108 | ) 109 | ) 110 | 111 | return Benchmark( 112 | name="mbpp", 113 | tasks=tasks, 114 | ) 115 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/mbpp/problem.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | 5 | @dataclass(frozen=True) 6 | class Problem: 7 | source_file: int 8 | task_id: str 9 | prompt: str 10 | code: str 11 | test_imports: str 12 | test_list: List[str] 13 | 14 | @property 15 | def starting_code(self) -> str: 16 | lines: List[str] = [] 17 | 18 | for line in self.code.split("\n"): 19 | lines.append(line) 20 | 21 | if line.startswith("def "): 22 | lines.append("pass # TODO: Implement method\n") 23 | break 24 | 25 | return "\n".join(lines) 26 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/benchmarks/mbpp/problems.py: -------------------------------------------------------------------------------- 1 | # TODO: Pick problems 2 | # Temporary testing against these problems 3 | PROBLEM_IDS = range(0, 100) 4 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/default_bench_config.toml: -------------------------------------------------------------------------------- 1 | # For apps, the maximal range is 0:5000 for both train and test 2 | [apps] 3 | active = true 4 | test_start_index = 0 5 | test_end_index = 2 6 | train_start_index = 0 7 | train_end_index = 2 8 | 9 | # For mbpp, the maximal range is 0:47 10 | [mbpp] 11 | active = true 12 | test_len = 2 13 | train_len = 2 14 | 15 | [gptme] 16 | active = true 17 | -------------------------------------------------------------------------------- /gpt_engineer/benchmark/types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module defining types used in benchmarking. 3 | 4 | This module contains dataclass definitions for various types used throughout the 5 | benchmarking process, such as Assertable, Task, Benchmark, and TaskResult. 6 | 7 | Classes: 8 | Assertable: 9 | Represents an object that can be asserted against in a benchmark task. 10 | 11 | Assertion: 12 | Type alias for a callable that takes an Assertable and returns a boolean. 13 | 14 | Task: 15 | Represents a single task within a benchmark, including its assertions. 16 | 17 | Benchmark: 18 | Represents a collection of tasks used to evaluate a model's performance. 19 | 20 | TaskResult: 21 | Represents the result of running a single task within a benchmark. 22 | """ 23 | from dataclasses import dataclass 24 | from subprocess import Popen 25 | from typing import Callable, Dict, Optional 26 | 27 | from gpt_engineer.core.base_execution_env import BaseExecutionEnv 28 | from gpt_engineer.core.files_dict import FilesDict 29 | from gpt_engineer.core.prompt import Prompt 30 | 31 | 32 | @dataclass 33 | class Assertable: 34 | """ 35 | A class representing an object which can be asserted against. 36 | 37 | Attributes: 38 | files (FilesDict): The code files involved in the assertion. 39 | env (BaseExecutionEnv): The execution environment in which the code is run. 40 | process (Popen): The subprocess in which the code is run. 41 | stdout (str): The standard output from the code execution. 42 | stderr (str): The standard error from the code execution. 43 | """ 44 | 45 | files: FilesDict 46 | env: BaseExecutionEnv 47 | process: Optional[Popen] 48 | stdout: Optional[str] 49 | stderr: Optional[str] 50 | 51 | 52 | Assertion = Callable[[Assertable], bool] 53 | 54 | 55 | @dataclass 56 | class Task: 57 | name: str 58 | initial_code: Optional[FilesDict] 59 | command: Optional[str] 60 | prompt: Prompt 61 | assertions: Optional[Dict[str, Assertion]] 62 | 63 | 64 | @dataclass 65 | class Benchmark: 66 | """A benchmark is a collection of tasks that evaluate a model's performance.""" 67 | 68 | name: str 69 | tasks: list[Task] 70 | timeout: Optional[int] = None 71 | 72 | 73 | @dataclass 74 | class TaskResult: 75 | task_name: str 76 | assertion_results: dict[str, bool] 77 | duration: float 78 | 79 | # Returns success rate from 0.00 up to 1.00 80 | @property 81 | def success_rate(self) -> float: 82 | if not self.assertion_results: 83 | return 0.0 84 | 85 | succeeded = len( 86 | [result for result in self.assertion_results.values() if result is True] 87 | ) 88 | 89 | return succeeded / len(self.assertion_results) 90 | 91 | def to_dict(self) -> dict: 92 | out_dict = {key: value for key, value in self.__dict__.items()} 93 | out_dict["solved"] = self.success_rate 94 | return out_dict 95 | -------------------------------------------------------------------------------- /gpt_engineer/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/gpt_engineer/core/__init__.py -------------------------------------------------------------------------------- /gpt_engineer/core/base_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base Agent Module 3 | 4 | This module provides an abstract base class for an agent that interacts with code. It defines the interface 5 | for agents capable of initializing and improving code based on a given prompt. Implementations of this class 6 | are expected to provide concrete methods for these actions. 7 | 8 | Classes: 9 | BaseAgent: Abstract base class for an agent that interacts with code. 10 | """ 11 | from abc import ABC, abstractmethod 12 | 13 | from gpt_engineer.core.files_dict import FilesDict 14 | from gpt_engineer.core.prompt import Prompt 15 | 16 | 17 | class BaseAgent(ABC): 18 | """ 19 | Abstract base class for an agent that interacts with code. 20 | 21 | Defines the interface for agents capable of initializing and improving code based on a given prompt. 22 | Implementations of this class are expected to provide concrete methods for these actions. 23 | """ 24 | 25 | @abstractmethod 26 | def init(self, prompt: Prompt) -> FilesDict: 27 | pass 28 | 29 | @abstractmethod 30 | def improve(self, files_dict: FilesDict, prompt: Prompt) -> FilesDict: 31 | pass 32 | -------------------------------------------------------------------------------- /gpt_engineer/core/base_execution_env.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from subprocess import Popen 3 | from typing import Optional, Tuple 4 | 5 | from gpt_engineer.core.files_dict import FilesDict 6 | 7 | 8 | class BaseExecutionEnv(ABC): 9 | """ 10 | Abstract base class for an execution environment capable of running code. 11 | 12 | This class defines the interface for execution environments that can execute commands, 13 | handle processes, and manage file uploads and downloads. 14 | """ 15 | 16 | @abstractmethod 17 | def run(self, command: str, timeout: Optional[int] = None) -> Tuple[str, str, int]: 18 | """ 19 | Runs a command in the execution environment. 20 | """ 21 | raise NotImplementedError 22 | 23 | @abstractmethod 24 | def popen(self, command: str) -> Popen: 25 | """ 26 | Runs a command in the execution environment. 27 | """ 28 | raise NotImplementedError 29 | 30 | @abstractmethod 31 | def upload(self, files: FilesDict) -> "BaseExecutionEnv": 32 | """ 33 | Uploads files to the execution environment. 34 | """ 35 | raise NotImplementedError 36 | 37 | @abstractmethod 38 | def download(self) -> FilesDict: 39 | """ 40 | Downloads files from the execution environment. 41 | """ 42 | raise NotImplementedError 43 | -------------------------------------------------------------------------------- /gpt_engineer/core/base_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base Memory Module 3 | 4 | This module provides a type alias for a mutable mapping that represents the base memory structure 5 | used in the GPT Engineer project. The base memory is a mapping from file names (as strings or Path objects) 6 | to their corresponding code content (as strings). 7 | 8 | Type Aliases: 9 | BaseMemory: A mutable mapping from file names to code content. 10 | """ 11 | 12 | from pathlib import Path 13 | from typing import MutableMapping, Union 14 | 15 | BaseMemory = MutableMapping[Union[str, Path], str] 16 | -------------------------------------------------------------------------------- /gpt_engineer/core/default/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/gpt_engineer/core/default/__init__.py -------------------------------------------------------------------------------- /gpt_engineer/core/default/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module defining constants used throughout the application. 3 | 4 | This module contains definitions of constants that are used across various 5 | components of the application to maintain consistency and ease of configuration. 6 | 7 | Constants 8 | --------- 9 | MAX_EDIT_REFINEMENT_STEPS : int 10 | The maximum number of refinement steps allowed when generating edit blocks. 11 | """ 12 | MAX_EDIT_REFINEMENT_STEPS = 2 13 | -------------------------------------------------------------------------------- /gpt_engineer/core/default/disk_execution_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for managing the execution environment on the local disk. 3 | 4 | This module provides a class that handles the execution of code stored on the local 5 | file system. It includes methods for uploading files to the execution environment, 6 | running commands, and capturing the output. 7 | 8 | Classes 9 | ------- 10 | DiskExecutionEnv 11 | An execution environment that runs code on the local file system and captures 12 | the output of the execution. 13 | 14 | Imports 15 | ------- 16 | - subprocess: For running shell commands. 17 | - time: For timing the execution of commands. 18 | - Path: For handling file system paths. 19 | - Optional, Tuple, Union: For type annotations. 20 | - BaseExecutionEnv: For inheriting the base execution environment interface. 21 | - FileStore: For managing file storage. 22 | - FilesDict: For handling collections of files. 23 | """ 24 | 25 | import subprocess 26 | import time 27 | 28 | from pathlib import Path 29 | from typing import Optional, Tuple, Union 30 | 31 | from gpt_engineer.core.base_execution_env import BaseExecutionEnv 32 | from gpt_engineer.core.default.file_store import FileStore 33 | from gpt_engineer.core.files_dict import FilesDict 34 | 35 | 36 | class DiskExecutionEnv(BaseExecutionEnv): 37 | """ 38 | An execution environment that runs code on the local file system and captures 39 | the output of the execution. 40 | 41 | This class is responsible for executing code that is stored on disk. It ensures that 42 | the necessary entrypoint file exists and then runs the code using a subprocess. If the 43 | execution is interrupted by the user, it handles the interruption gracefully. 44 | 45 | Attributes 46 | ---------- 47 | store : FileStore 48 | An instance of FileStore that manages the storage of files in the execution 49 | environment. 50 | """ 51 | 52 | def __init__(self, path: Union[str, Path, None] = None): 53 | self.files = FileStore(path) 54 | 55 | def upload(self, files: FilesDict) -> "DiskExecutionEnv": 56 | self.files.push(files) 57 | return self 58 | 59 | def download(self) -> FilesDict: 60 | return self.files.pull() 61 | 62 | def popen(self, command: str) -> subprocess.Popen: 63 | p = subprocess.Popen( 64 | command, 65 | shell=True, 66 | cwd=self.files.working_dir, 67 | stdout=subprocess.PIPE, 68 | stderr=subprocess.PIPE, 69 | ) 70 | return p 71 | 72 | def run(self, command: str, timeout: Optional[int] = None) -> Tuple[str, str, int]: 73 | start = time.time() 74 | print("\n--- Start of run ---") 75 | # while running, also print the stdout and stderr 76 | p = subprocess.Popen( 77 | command, 78 | stdout=subprocess.PIPE, 79 | stderr=subprocess.PIPE, 80 | cwd=self.files.working_dir, 81 | text=True, 82 | shell=True, 83 | ) 84 | print("$", command) 85 | stdout_full, stderr_full = "", "" 86 | 87 | try: 88 | while p.poll() is None: 89 | assert p.stdout is not None 90 | assert p.stderr is not None 91 | stdout = p.stdout.readline() 92 | stderr = p.stderr.readline() 93 | if stdout: 94 | print(stdout, end="") 95 | stdout_full += stdout 96 | if stderr: 97 | print(stderr, end="") 98 | stderr_full += stderr 99 | if timeout and time.time() - start > timeout: 100 | print("Timeout!") 101 | p.kill() 102 | raise TimeoutError() 103 | except KeyboardInterrupt: 104 | print() 105 | print("Stopping execution.") 106 | print("Execution stopped.") 107 | p.kill() 108 | print() 109 | print("--- Finished run ---\n") 110 | 111 | return stdout_full, stderr_full, p.returncode 112 | -------------------------------------------------------------------------------- /gpt_engineer/core/default/file_store.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | from pathlib import Path 4 | from typing import Union 5 | 6 | from gpt_engineer.core.files_dict import FilesDict 7 | from gpt_engineer.core.linting import Linting 8 | 9 | 10 | class FileStore: 11 | """ 12 | Module for managing file storage in a temporary directory. 13 | 14 | This module provides a class that manages the storage of files in a temporary directory. 15 | It includes methods for uploading files to the directory and downloading them as a 16 | collection of files. 17 | 18 | Classes 19 | ------- 20 | FileStore 21 | Manages file storage in a temporary directory, allowing for upload and download of files. 22 | 23 | Imports 24 | ------- 25 | - tempfile: For creating temporary directories. 26 | - Path: For handling file system paths. 27 | - Union: For type annotations. 28 | - FilesDict: For handling collections of files. 29 | """ 30 | 31 | def __init__(self, path: Union[str, Path, None] = None): 32 | if path is None: 33 | path = Path(tempfile.mkdtemp(prefix="gpt-engineer-")) 34 | 35 | self.working_dir = Path(path) 36 | self.working_dir.mkdir(parents=True, exist_ok=True) 37 | self.id = self.working_dir.name.split("-")[-1] 38 | 39 | def push(self, files: FilesDict): 40 | for name, content in files.items(): 41 | path = self.working_dir / name 42 | path.parent.mkdir(parents=True, exist_ok=True) 43 | with open(path, "w") as f: 44 | f.write(content) 45 | return self 46 | 47 | def linting(self, files: FilesDict) -> FilesDict: 48 | # lint the code 49 | linting = Linting() 50 | return linting.lint_files(files) 51 | 52 | def pull(self) -> FilesDict: 53 | files = {} 54 | for path in self.working_dir.glob("**/*"): 55 | if path.is_file(): 56 | with open(path, "r") as f: 57 | try: 58 | content = f.read() 59 | except UnicodeDecodeError: 60 | content = "binary file" 61 | files[str(path.relative_to(self.working_dir))] = content 62 | return FilesDict(files) 63 | -------------------------------------------------------------------------------- /gpt_engineer/core/default/paths.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module defining file system paths used by the application. 3 | 4 | This module contains definitions of file system paths that are used throughout the 5 | application to locate and manage various files and directories, such as logs, memory, 6 | and preprompts. 7 | 8 | Constants 9 | --------- 10 | META_DATA_REL_PATH : str 11 | The relative path to the directory where metadata is stored. 12 | 13 | MEMORY_REL_PATH : str 14 | The relative path to the directory where memory-related files are stored. 15 | 16 | CODE_GEN_LOG_FILE : str 17 | The filename for the log file that contains all output from code generation. 18 | 19 | DEBUG_LOG_FILE : str 20 | The filename for the log file that contains debug information. 21 | 22 | ENTRYPOINT_FILE : str 23 | The filename for the entrypoint script that is executed to run the application. 24 | 25 | ENTRYPOINT_LOG_FILE : str 26 | The filename for the log file that contains the chat related to entrypoint generation. 27 | 28 | PREPROMPTS_PATH : Path 29 | The file system path to the directory containing preprompt files. 30 | 31 | Functions 32 | --------- 33 | memory_path : function 34 | Constructs the full path to the memory directory based on a given base path. 35 | 36 | metadata_path : function 37 | Constructs the full path to the metadata directory based on a given base path. 38 | """ 39 | import os 40 | 41 | from pathlib import Path 42 | 43 | META_DATA_REL_PATH = ".gpteng" 44 | MEMORY_REL_PATH = os.path.join(META_DATA_REL_PATH, "memory") 45 | CODE_GEN_LOG_FILE = "all_output.txt" 46 | IMPROVE_LOG_FILE = "improve.txt" 47 | DIFF_LOG_FILE = "diff_errors.txt" 48 | DEBUG_LOG_FILE = "debug_log_file.txt" 49 | ENTRYPOINT_FILE = "run.sh" 50 | ENTRYPOINT_LOG_FILE = "gen_entrypoint_chat.txt" 51 | ENTRYPOINT_FILE = "run.sh" 52 | PREPROMPTS_PATH = Path(__file__).parent.parent.parent / "preprompts" 53 | 54 | 55 | def memory_path(path): 56 | """ 57 | Constructs the full path to the memory directory based on a given base path. 58 | 59 | Parameters 60 | ---------- 61 | path : str 62 | The base path to append the memory directory to. 63 | 64 | Returns 65 | ------- 66 | str 67 | The full path to the memory directory. 68 | """ 69 | return os.path.join(path, MEMORY_REL_PATH) 70 | 71 | 72 | def metadata_path(path): 73 | """ 74 | Constructs the full path to the metadata directory based on a given base path. 75 | 76 | Parameters 77 | ---------- 78 | path : str 79 | The base path to append the metadata directory to. 80 | 81 | Returns 82 | ------- 83 | str 84 | The full path to the metadata directory. 85 | """ 86 | return os.path.join(path, META_DATA_REL_PATH) 87 | -------------------------------------------------------------------------------- /gpt_engineer/core/default/simple_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for defining a simple agent that uses AI to manage code generation and improvement. 3 | 4 | This module provides a class that represents an agent capable of initializing and improving 5 | a codebase using AI. It handles interactions with the AI model, memory, and execution 6 | environment to generate and refine code based on user prompts. 7 | 8 | """ 9 | 10 | import tempfile 11 | 12 | from typing import Optional 13 | 14 | from gpt_engineer.core.ai import AI 15 | from gpt_engineer.core.base_agent import BaseAgent 16 | from gpt_engineer.core.base_execution_env import BaseExecutionEnv 17 | from gpt_engineer.core.base_memory import BaseMemory 18 | from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv 19 | from gpt_engineer.core.default.disk_memory import DiskMemory 20 | from gpt_engineer.core.default.paths import PREPROMPTS_PATH, memory_path 21 | from gpt_engineer.core.default.steps import gen_code, gen_entrypoint, improve_fn 22 | from gpt_engineer.core.files_dict import FilesDict 23 | from gpt_engineer.core.preprompts_holder import PrepromptsHolder 24 | from gpt_engineer.core.prompt import Prompt 25 | 26 | 27 | class SimpleAgent(BaseAgent): 28 | """ 29 | An agent that uses AI to generate and improve code based on a given prompt. 30 | 31 | This agent is capable of initializing a codebase from a prompt and improving an existing 32 | codebase based on user input. It uses an AI model to generate and refine code, and it 33 | interacts with a repository and an execution environment to manage and execute the code. 34 | 35 | Attributes 36 | ---------- 37 | memory : BaseMemory 38 | The memory interface where the code and related data are stored. 39 | execution_env : BaseExecutionEnv 40 | The execution environment in which the code is executed. 41 | ai : AI 42 | The AI model used for generating and improving code. 43 | preprompts_holder : PrepromptsHolder 44 | The holder for preprompt messages that guide the AI model. 45 | """ 46 | 47 | def __init__( 48 | self, 49 | memory: BaseMemory, 50 | execution_env: BaseExecutionEnv, 51 | ai: AI = None, 52 | preprompts_holder: PrepromptsHolder = None, 53 | ): 54 | self.preprompts_holder = preprompts_holder or PrepromptsHolder(PREPROMPTS_PATH) 55 | self.memory = memory 56 | self.execution_env = execution_env 57 | self.ai = ai or AI() 58 | 59 | @classmethod 60 | def with_default_config( 61 | cls, path: str, ai: AI = None, preprompts_holder: PrepromptsHolder = None 62 | ): 63 | return cls( 64 | memory=DiskMemory(memory_path(path)), 65 | execution_env=DiskExecutionEnv(), 66 | ai=ai, 67 | preprompts_holder=preprompts_holder or PrepromptsHolder(PREPROMPTS_PATH), 68 | ) 69 | 70 | def init(self, prompt: Prompt) -> FilesDict: 71 | files_dict = gen_code(self.ai, prompt, self.memory, self.preprompts_holder) 72 | entrypoint = gen_entrypoint( 73 | self.ai, prompt, files_dict, self.memory, self.preprompts_holder 74 | ) 75 | combined_dict = {**files_dict, **entrypoint} 76 | files_dict = FilesDict(combined_dict) 77 | return files_dict 78 | 79 | def improve( 80 | self, 81 | files_dict: FilesDict, 82 | prompt: Prompt, 83 | execution_command: Optional[str] = None, 84 | ) -> FilesDict: 85 | files_dict = improve_fn( 86 | self.ai, prompt, files_dict, self.memory, self.preprompts_holder 87 | ) 88 | return files_dict 89 | 90 | 91 | def default_config_agent(): 92 | """ 93 | Creates an instance of SimpleAgent with default configuration. 94 | 95 | Returns 96 | ------- 97 | SimpleAgent 98 | An instance of SimpleAgent with a temporary directory as its base path. 99 | """ 100 | return SimpleAgent.with_default_config(tempfile.mkdtemp()) 101 | -------------------------------------------------------------------------------- /gpt_engineer/core/files_dict.py: -------------------------------------------------------------------------------- 1 | """ 2 | FilesDict Module 3 | 4 | This module provides a FilesDict class which is a dictionary-based container for managing code files. 5 | It extends the standard dictionary to enforce string keys and values, representing filenames and their 6 | corresponding code content. It also provides methods to format its contents for chat-based interaction 7 | with an AI agent and to enforce type checks on keys and values. 8 | 9 | Classes: 10 | FilesDict: A dictionary-based container for managing code files. 11 | """ 12 | from collections import OrderedDict 13 | from pathlib import Path 14 | from typing import Union 15 | 16 | 17 | # class Code(MutableMapping[str | Path, str]): 18 | # ToDo: implement as mutable mapping, potentially holding a dict instead of being a dict. 19 | class FilesDict(dict): 20 | """ 21 | A dictionary-based container for managing code files. 22 | 23 | This class extends the standard dictionary to enforce string keys and values, 24 | representing filenames and their corresponding code content. It provides methods 25 | to format its contents for chat-based interaction with an AI agent and to enforce 26 | type checks on keys and values. 27 | """ 28 | 29 | def __setitem__(self, key: Union[str, Path], value: str): 30 | """ 31 | Set the code content for the given filename, enforcing type checks on the key and value. 32 | 33 | Overrides the dictionary's __setitem__ to enforce type checks on the key and value. 34 | The key must be a string or a Path object, and the value must be a string representing 35 | the code content. 36 | 37 | Parameters 38 | ---------- 39 | key : Union[str, Path] 40 | The filename as a key for the code content. 41 | value : str 42 | The code content to associate with the filename. 43 | 44 | Raises 45 | ------ 46 | TypeError 47 | If the key is not a string or Path, or if the value is not a string. 48 | """ 49 | if not isinstance(key, (str, Path)): 50 | raise TypeError("Keys must be strings or Path's") 51 | if not isinstance(value, str): 52 | raise TypeError("Values must be strings") 53 | super().__setitem__(key, value) 54 | 55 | def to_chat(self): 56 | """ 57 | Formats the items of the object (assuming file name and content pairs) 58 | into a string suitable for chat display. 59 | 60 | Returns 61 | ------- 62 | str 63 | A string representation of the files. 64 | """ 65 | chat_str = "" 66 | for file_name, file_content in self.items(): 67 | lines_dict = file_to_lines_dict(file_content) 68 | chat_str += f"File: {file_name}\n" 69 | for line_number, line_content in lines_dict.items(): 70 | chat_str += f"{line_number} {line_content}\n" 71 | chat_str += "\n" 72 | return f"```\n{chat_str}```" 73 | 74 | def to_log(self): 75 | """ 76 | Formats the items of the object (assuming file name and content pairs) 77 | into a string suitable for log display. 78 | 79 | Returns 80 | ------- 81 | str 82 | A string representation of the files. 83 | """ 84 | log_str = "" 85 | for file_name, file_content in self.items(): 86 | log_str += f"File: {file_name}\n" 87 | log_str += file_content 88 | log_str += "\n" 89 | return log_str 90 | 91 | 92 | def file_to_lines_dict(file_content: str) -> dict: 93 | """ 94 | Converts file content into a dictionary where each line number is a key 95 | and the corresponding line content is the value. 96 | 97 | Parameters 98 | ---------- 99 | file_name : str 100 | The name of the file. 101 | file_content : str 102 | The content of the file. 103 | 104 | Returns 105 | ------- 106 | dict 107 | A dictionary with file names as keys and dictionaries (line numbers as keys and line contents as values) as values. 108 | """ 109 | lines_dict = OrderedDict( 110 | { 111 | line_number: line_content 112 | for line_number, line_content in enumerate(file_content.split("\n"), 1) 113 | } 114 | ) 115 | return lines_dict 116 | -------------------------------------------------------------------------------- /gpt_engineer/core/git.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import subprocess 3 | 4 | from pathlib import Path 5 | from typing import List 6 | 7 | from gpt_engineer.core.files_dict import FilesDict 8 | 9 | 10 | def is_git_installed(): 11 | return shutil.which("git") is not None 12 | 13 | 14 | def is_git_repo(path: Path): 15 | return ( 16 | subprocess.run( 17 | ["git", "rev-parse", "--is-inside-work-tree"], 18 | cwd=path, 19 | stdout=subprocess.PIPE, 20 | stderr=subprocess.PIPE, 21 | ).returncode 22 | == 0 23 | ) 24 | 25 | 26 | def init_git_repo(path: Path): 27 | subprocess.run(["git", "init"], cwd=path) 28 | 29 | 30 | def has_uncommitted_changes(path: Path): 31 | return bool( 32 | subprocess.run( 33 | ["git", "diff", "--exit-code"], 34 | cwd=path, 35 | stdout=subprocess.PIPE, 36 | stderr=subprocess.PIPE, 37 | ).returncode 38 | ) 39 | 40 | 41 | def filter_files_with_uncommitted_changes( 42 | basepath: Path, files_dict: FilesDict 43 | ) -> List[Path]: 44 | files_with_diff = ( 45 | subprocess.run( 46 | ["git", "diff", "--name-only"], cwd=basepath, stdout=subprocess.PIPE 47 | ) 48 | .stdout.decode() 49 | .splitlines() 50 | ) 51 | return [f for f in files_dict.keys() if f in files_with_diff] 52 | 53 | 54 | def stage_files(path: Path, files: List[str]): 55 | subprocess.run(["git", "add", *files], cwd=path) 56 | 57 | 58 | def filter_by_gitignore(path: Path, file_list: List[str]) -> List[str]: 59 | out = subprocess.run( 60 | ["git", "-C", ".", "check-ignore", "--no-index", "--stdin"], 61 | cwd=path, 62 | input="\n".join(file_list).encode(), 63 | stdout=subprocess.PIPE, 64 | stderr=subprocess.PIPE, 65 | ) 66 | paths = out.stdout.decode().splitlines() 67 | # return file_list but filter out the results from git check-ignore 68 | return [f for f in file_list if f not in paths] 69 | 70 | 71 | def stage_uncommitted_to_git(path, files_dict, improve_mode): 72 | # Check if there's a git repo and verify that there aren't any uncommitted changes 73 | if is_git_installed() and not improve_mode: 74 | if not is_git_repo(path): 75 | print("\nInitializing an empty git repository") 76 | init_git_repo(path) 77 | 78 | if is_git_repo(path): 79 | modified_files = filter_files_with_uncommitted_changes(path, files_dict) 80 | if modified_files: 81 | print( 82 | "Staging the following uncommitted files before overwriting: ", 83 | ", ".join(modified_files), 84 | ) 85 | stage_files(path, modified_files) 86 | -------------------------------------------------------------------------------- /gpt_engineer/core/linting.py: -------------------------------------------------------------------------------- 1 | import black 2 | 3 | from gpt_engineer.core.files_dict import FilesDict 4 | 5 | 6 | class Linting: 7 | def __init__(self): 8 | # Dictionary to hold linting methods for different file types 9 | self.linters = {".py": self.lint_python} 10 | 11 | import black 12 | 13 | def lint_python(self, content, config): 14 | """Lint Python files using the `black` library, handling all exceptions silently and logging them. 15 | This function attempts to format the code and returns the formatted code if successful. 16 | If any error occurs during formatting, it logs the error and returns the original content. 17 | """ 18 | try: 19 | # Try to format the content using black 20 | linted_content = black.format_str(content, mode=black.FileMode(**config)) 21 | except black.NothingChanged: 22 | # If nothing changed, log the info and return the original content 23 | print("\nInfo: No changes were made during formatting.\n") 24 | linted_content = content 25 | except Exception as error: 26 | # If any other exception occurs, log the error and return the original content 27 | print(f"\nError: Could not format due to {error}\n") 28 | linted_content = content 29 | return linted_content 30 | 31 | def lint_files(self, files_dict: FilesDict, config: dict = None) -> FilesDict: 32 | """ 33 | Lints files based on their extension using registered linting functions. 34 | 35 | Parameters 36 | ---------- 37 | files_dict : FilesDict 38 | The dictionary of file names to their respective source code content. 39 | config : dict, optional 40 | A dictionary of configuration options for the linting tools. 41 | 42 | Returns 43 | ------- 44 | FilesDict 45 | The dictionary of file names to their respective source code content after linting. 46 | """ 47 | if config is None: 48 | config = {} 49 | 50 | for filename, content in files_dict.items(): 51 | extension = filename[ 52 | filename.rfind(".") : 53 | ].lower() # Ensure case insensitivity 54 | if extension in self.linters: 55 | original_content = content 56 | linted_content = self.linters[extension](content, config) 57 | if linted_content != original_content: 58 | print(f"Linted {filename}.") 59 | else: 60 | print(f"No changes made for {filename}.") 61 | files_dict[filename] = linted_content 62 | else: 63 | print(f"No linter registered for {filename}.") 64 | return files_dict 65 | -------------------------------------------------------------------------------- /gpt_engineer/core/preprompts_holder.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict 3 | 4 | from gpt_engineer.core.default.disk_memory import DiskMemory 5 | 6 | 7 | class PrepromptsHolder: 8 | """ 9 | A holder for preprompt texts that are stored on disk. 10 | 11 | This class provides methods to retrieve preprompt texts from a specified directory. 12 | 13 | Attributes 14 | ---------- 15 | preprompts_path : Path 16 | The file path to the directory containing preprompt texts. 17 | 18 | Methods 19 | ------- 20 | get_preprompts() -> Dict[str, str] 21 | Retrieve all preprompt texts from the directory and return them as a dictionary. 22 | """ 23 | 24 | def __init__(self, preprompts_path: Path): 25 | self.preprompts_path = preprompts_path 26 | 27 | def get_preprompts(self) -> Dict[str, str]: 28 | preprompts_repo = DiskMemory(self.preprompts_path) 29 | return {file_name: preprompts_repo[file_name] for file_name in preprompts_repo} 30 | -------------------------------------------------------------------------------- /gpt_engineer/core/prompt.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from typing import Dict, Optional 4 | 5 | 6 | class Prompt: 7 | def __init__( 8 | self, 9 | text: str, 10 | image_urls: Optional[Dict[str, str]] = None, 11 | entrypoint_prompt: str = "", 12 | ): 13 | self.text = text 14 | self.image_urls = image_urls 15 | self.entrypoint_prompt = entrypoint_prompt 16 | 17 | def __repr__(self): 18 | return f"Prompt(text={self.text!r}, image_urls={self.image_urls!r})" 19 | 20 | def to_langchain_content(self): 21 | content = [{"type": "text", "text": f"Request: {self.text}"}] 22 | 23 | if self.image_urls: 24 | for name, url in self.image_urls.items(): 25 | image_content = { 26 | "type": "image_url", 27 | "image_url": { 28 | "url": url, 29 | "detail": "low", 30 | }, 31 | } 32 | content.append(image_content) 33 | 34 | return content 35 | 36 | def to_dict(self): 37 | return { 38 | "text": self.text, 39 | "image_urls": self.image_urls, 40 | "entrypoint_prompt": self.entrypoint_prompt, 41 | } 42 | 43 | def to_json(self): 44 | return json.dumps(self.to_dict()) 45 | -------------------------------------------------------------------------------- /gpt_engineer/core/version_manager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Version Manager Module 3 | 4 | This module provides an abstract base class for a version manager that handles the creation of snapshots 5 | for code. Implementations of this class are expected to provide methods to create a snapshot of the given 6 | code and return a reference to it. 7 | """ 8 | from abc import ABC, abstractmethod 9 | from pathlib import Path 10 | from typing import Union 11 | 12 | from gpt_engineer.core.files_dict import FilesDict 13 | 14 | 15 | class BaseVersionManager(ABC): 16 | """ 17 | Abstract base class for a version manager. 18 | 19 | Defines the interface for version managers that handle the creation of snapshots for code. 20 | Implementations of this class are expected to provide methods to create a snapshot of the given 21 | code and return a reference to it. 22 | """ 23 | 24 | @abstractmethod 25 | def __init__(self, path: Union[str, Path]): 26 | pass 27 | 28 | @abstractmethod 29 | def snapshot(self, files_dict: FilesDict) -> str: 30 | pass 31 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/clarify: -------------------------------------------------------------------------------- 1 | Given some instructions, determine if anything needs to be clarified, do not carry them out. 2 | You can make reasonable assumptions, but if you are unsure, ask a single clarification question. 3 | Otherwise state: "Nothing to clarify" 4 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/entrypoint: -------------------------------------------------------------------------------- 1 | You will get information about a codebase that is currently on disk in the current folder. 2 | The user will ask you to write a script that runs the code in a specific way. 3 | You will answer with code blocks that include all the necessary terminal commands. 4 | Do not install globally. Do not use sudo. 5 | Do not explain the code, just give the commands. 6 | Do not use placeholders, use example values (like . for a folder argument) if necessary. 7 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/file_format: -------------------------------------------------------------------------------- 1 | You will output the content of each file necessary to achieve the goal, including ALL code. 2 | Represent files like so: 3 | 4 | FILENAME 5 | ``` 6 | CODE 7 | ``` 8 | 9 | The following tokens must be replaced like so: 10 | FILENAME is the lowercase combined path and file name including the file extension 11 | CODE is the code in the file 12 | 13 | Example representation of a file: 14 | 15 | src/hello_world.py 16 | ``` 17 | print("Hello World") 18 | ``` 19 | 20 | Do not comment on what every file does. Please note that the code should be fully functional. No placeholders. 21 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/file_format_diff: -------------------------------------------------------------------------------- 1 | You will output the content of each file necessary to achieve the goal, including ALL code. 2 | Output requested code changes and new code in the unified "git diff" syntax. Example: 3 | 4 | ```diff 5 | --- example.txt 6 | +++ example.txt 7 | @@ -6,3 +6,4 @@ 8 | line content A 9 | line content B 10 | + new line added 11 | - original line X 12 | + modified line X with changes 13 | @@ -26,4 +27,5 @@ 14 | condition check: 15 | - action for condition A 16 | + if certain condition is met: 17 | + alternative action for condition A 18 | another condition check: 19 | - action for condition B 20 | + modified action for condition B 21 | ``` 22 | 23 | Example of a git diff creating a new file: 24 | 25 | ```diff 26 | --- /dev/null 27 | +++ new_file.txt 28 | @@ -0,0 +1,3 @@ 29 | +First example line 30 | + 31 | +Last example line 32 | ``` 33 | 34 | RULES: 35 | -A program will apply the diffs you generate exactly to the code, so diffs must be precise and unambiguous! 36 | -Every diff must be fenced with triple backtick ```. 37 | -The file names at the beginning of a diff, (lines starting with --- and +++) is the relative path to the file before and after the diff. 38 | -LINES TO BE REMOVED (starting with single -) AND LINES TO BE RETAIN (no starting symbol) HAVE TO REPLICATE THE DIFFED HUNK OF THE CODE EXACTLY LINE BY LINE. KEEP THE NUMBER OF RETAIN LINES SMALL IF POSSIBLE. 39 | -EACH LINE IN THE SOURCE FILES STARTS WITH A LINE NUMBER, WHICH IS NOT PART OF THE SOURCE CODE. NEVER TRANSFER THESE LINE NUMBERS TO THE DIFF HUNKS. 40 | -AVOID STARTING A HUNK WITH AN EMPTY LINE. 41 | -ENSURE ALL CHANGES ARE PROVIDED IN A SINGLE DIFF CHUNK PER FILE TO PREVENT MULTIPLE DIFFS ON THE SAME FILE. 42 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/file_format_fix: -------------------------------------------------------------------------------- 1 | Please fix any errors in the code above. 2 | 3 | You will output the content of each new or changed. 4 | Represent files like so: 5 | 6 | FILENAME 7 | ``` 8 | CODE 9 | ``` 10 | 11 | The following tokens must be replaced like so: 12 | FILENAME is the lowercase combined path and file name including the file extension 13 | CODE is the code in the file 14 | 15 | Example representation of a file: 16 | 17 | src/hello_world.py 18 | ``` 19 | print("Hello World") 20 | ``` 21 | 22 | Do not comment on what every file does. Please note that the code should be fully functional. No placeholders. 23 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/generate: -------------------------------------------------------------------------------- 1 | Think step by step and reason yourself to the correct decisions to make sure we get it right. 2 | First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose. 3 | 4 | FILE_FORMAT 5 | 6 | You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on. 7 | Please note that the code should be fully functional. No placeholders. 8 | 9 | Follow a language and framework appropriate best practice file naming convention. 10 | Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. 11 | Ensure to implement all code, if you are unsure, write a plausible implementation. 12 | Include module dependency or package manager dependency definition file. 13 | Before you finish, double check that all parts of the architecture is present in the files. 14 | 15 | When you are done, write finish with "this concludes a fully working implementation". 16 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/improve: -------------------------------------------------------------------------------- 1 | Think step by step and reason yourself to the correct decisions to make sure we get it right. 2 | Make changes to existing code and implement new code in the unified git diff syntax. When implementing new code, First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose. 3 | 4 | FILE_FORMAT 5 | 6 | As far as compatible with the user request, start with the "entrypoint" file, then go to the ones that are imported by that file, and so on. 7 | Please note that the code should be fully functional. No placeholders. 8 | 9 | Follow a language and framework appropriate best practice file naming convention. 10 | Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. 11 | Ensure to implement all code, if you are unsure, write a plausible implementation. 12 | Include module dependency or package manager dependency definition file. 13 | Before you finish, double check that all parts of the architecture is present in the files. 14 | 15 | When you are done, write finish with "this concludes a fully working implementation". 16 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/philosophy: -------------------------------------------------------------------------------- 1 | Almost always put different classes in different files. 2 | Always use the programming language the user asks for. 3 | For Python, you always create an appropriate requirements.txt file. 4 | For NodeJS, you always create an appropriate package.json file. 5 | Always add a comment briefly describing the purpose of the function definition. 6 | Add comments explaining very complex bits of logic. 7 | Always follow the best practices for the requested languages for folder/file structure and how to package the project. 8 | 9 | 10 | Python toolbelt preferences: 11 | - pytest 12 | - dataclasses 13 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/roadmap: -------------------------------------------------------------------------------- 1 | You will get instructions for code to write. 2 | You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code. 3 | -------------------------------------------------------------------------------- /gpt_engineer/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/gpt_engineer/tools/__init__.py -------------------------------------------------------------------------------- /gpt_engineer/tools/supported_languages.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module defines the supported programming languages for document chunking. 3 | 4 | Variables: 5 | SUPPORTED_LANGUAGES (list): A list of dictionaries defining supported languages. 6 | """ 7 | 8 | SUPPORTED_LANGUAGES = [ 9 | {"name": "Python", "extensions": [".py"], "tree_sitter_name": "python"}, 10 | { 11 | "name": "JavaScript", 12 | "extensions": [".js", ".mjs"], 13 | "tree_sitter_name": "javascript", 14 | }, 15 | {"name": "HTML", "extensions": [".html", ".htm"], "tree_sitter_name": "html"}, 16 | {"name": "CSS", "extensions": [".css"], "tree_sitter_name": "css"}, 17 | {"name": "Java", "extensions": [".java"], "tree_sitter_name": "java"}, 18 | {"name": "C#", "extensions": [".cs"], "tree_sitter_name": "c_sharp"}, 19 | { 20 | "name": "TypeScript", 21 | "extensions": [".ts", ".tsx"], 22 | "tree_sitter_name": "typescript", 23 | }, 24 | {"name": "Ruby", "extensions": [".rb", ".erb"], "tree_sitter_name": "ruby"}, 25 | { 26 | "name": "PHP", 27 | "extensions": [ 28 | ".php", 29 | ".phtml", 30 | ".php3", 31 | ".php4", 32 | ".php5", 33 | ".php7", 34 | ".phps", 35 | ".php-s", 36 | ".pht", 37 | ".phar", 38 | ], 39 | "tree_sitter_name": "php", 40 | }, 41 | {"name": "Go", "extensions": [".go"], "tree_sitter_name": "go"}, 42 | {"name": "Kotlin", "extensions": [".kt", ".kts"], "tree_sitter_name": "kotlin"}, 43 | {"name": "Rust", "extensions": [".rs"], "tree_sitter_name": "rust"}, 44 | { 45 | "name": "C++", 46 | "extensions": [".cpp", ".cc", ".cxx", ".h", ".hpp", ".hxx"], 47 | "tree_sitter_name": "cpp", 48 | }, 49 | {"name": "C", "extensions": [".c", ".h"], "tree_sitter_name": "c"}, 50 | {"name": "Markdown", "extensions": [".md"], "tree_sitter_name": "md"}, 51 | {"name": "Arduino C", "extensions": [".ino"], "tree_sitter_name": "ino"} 52 | # ---- the following are not supported by the current code chunker implementation ---- 53 | # { 54 | # "name": "Swift", 55 | # "extensions": [".swift"], 56 | # "tree_sitter_name": "swift" 57 | # }, 58 | ] 59 | -------------------------------------------------------------------------------- /projects/example-improve/README.md: -------------------------------------------------------------------------------- 1 | To implement the game Snake in Python using the Model-View-Controller (MVC) design pattern, we will need several classes and files. The game will be controlled using the keyboard. 2 | 3 | Here are the core classes and their purposes: 4 | 5 | 1. `Game`: This is the main class that will control the game flow. It will be responsible for starting the game, handling user input, and updating the game state. 6 | 7 | 2. `Snake`: This class will represent the snake in the game. It will have methods to move the snake, grow the snake, and check if the snake has collided with itself. 8 | 9 | 3. `Food`: This class will represent the food in the game. It will have methods to place the food at a random location on the game board. 10 | 11 | 4. `Board`: This class will represent the game board. It will have methods to draw the board, the snake, and the food. 12 | 13 | 5. `Controller`: This class will handle user input. It will have methods to listen for keyboard events and update the direction of the snake accordingly. 14 | 15 | Now, let's start with the entry point file, `main.py`, and then go to the ones that are imported by that file. 16 | 17 | src/main.py 18 | -------------------------------------------------------------------------------- /projects/example-improve/controller.py: -------------------------------------------------------------------------------- 1 | import keyboard 2 | 3 | 4 | class Controller: 5 | def __init__(self, game, view): 6 | self.game = game 7 | self.view = view 8 | 9 | def handle_input(self): 10 | if keyboard.is_pressed("up") and not hasattr(self, "last_key_pressed"): 11 | self.game.move("down") 12 | self.last_key_pressed = "up" 13 | elif hasattr(self, "last_key_pressed") and self.last_key_pressed == "up": 14 | self.game.move("right") 15 | del self.last_key_pressed 16 | elif keyboard.is_pressed("down"): 17 | self.game.move("up") 18 | elif keyboard.is_pressed("left"): 19 | self.game.move("right") 20 | elif keyboard.is_pressed("right"): 21 | self.game.move("left") 22 | -------------------------------------------------------------------------------- /projects/example-improve/main.py: -------------------------------------------------------------------------------- 1 | from controller import Controller 2 | from model import Game 3 | from view import View 4 | 5 | 6 | def main(): 7 | game = Game() 8 | view = View(game) 9 | controller = Controller(game, view) 10 | 11 | while game.is_running: 12 | controller.handle_input() 13 | game.update() 14 | view.render() 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /projects/example-improve/model.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from dataclasses import dataclass 4 | 5 | 6 | @dataclass 7 | class Point: 8 | x: int 9 | y: int 10 | 11 | 12 | class Game: 13 | def __init__(self): 14 | self.snake = [Point(5, 5)] 15 | self.food = self.generate_food() 16 | self.is_running = True 17 | 18 | def generate_food(self): 19 | return Point(random.randint(0, 10), random.randint(0, 10)) 20 | 21 | def update(self): 22 | # Move the snake 23 | self.snake.move() 24 | 25 | # Check for collision with food 26 | if self.snake.head == self.food: 27 | self.snake.grow() 28 | self.food = self.generate_food() 29 | 30 | # Check for collision with boundaries 31 | if not (0 <= self.snake.head.x < 10 and 0 <= self.snake.head.y < 10): 32 | self.is_running = False 33 | -------------------------------------------------------------------------------- /projects/example-improve/prompt: -------------------------------------------------------------------------------- 1 | If up is pressed, the snake should turn down and after one step to the right. If down is pressed it should go up. Also, make the snake go left when right is pressed and right when left is pressed. 2 | -------------------------------------------------------------------------------- /projects/example-improve/requirements.txt: -------------------------------------------------------------------------------- 1 | keyboard==0.13.5 2 | -------------------------------------------------------------------------------- /projects/example-improve/run.sh: -------------------------------------------------------------------------------- 1 | # a) Install dependencies 2 | python3 -m venv venv 3 | source venv/bin/activate 4 | pip install -r requirements.txt 5 | 6 | # b) Run all necessary parts of the codebase 7 | python main.py 8 | -------------------------------------------------------------------------------- /projects/example-improve/view.py: -------------------------------------------------------------------------------- 1 | from model import Point 2 | 3 | 4 | class View: 5 | def __init__(self, game): 6 | self.game = game 7 | 8 | def render(self): 9 | # Print the game state 10 | for y in range(10): 11 | for x in range(10): 12 | if Point(x, y) in self.game.snake: 13 | print("S", end="") 14 | elif Point(x, y) == self.game.food: 15 | print("F", end="") 16 | else: 17 | print(".", end="") 18 | print() 19 | print() 20 | -------------------------------------------------------------------------------- /projects/example-vision/images/ux_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/projects/example-vision/images/ux_diagram.png -------------------------------------------------------------------------------- /projects/example-vision/navigation.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/example-vision/prompt: -------------------------------------------------------------------------------- 1 | Alter the nav so it looks like the ux diagram provided 2 | -------------------------------------------------------------------------------- /projects/example/prompt: -------------------------------------------------------------------------------- 1 | We are writing snake in python. MVC components split in separate files. Keyboard control. 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "gpt-engineer" 3 | version = "0.3.1" 4 | description = "Specify what you want it to build, the AI asks for clarification, and then builds it." 5 | authors = ["Anton Osika "] 6 | license = "MIT" 7 | readme = "README.md" 8 | homepage = "https://github.com/gpt-engineer-org/gpt-engineer" 9 | repository = "https://github.com/gpt-engineer-org/gpt-engineer" 10 | documentation = "https://gpt-engineer.readthedocs.io/en/latest/" 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 14 | ] 15 | 16 | [build-system] 17 | requires = ["poetry-core>=1.0.0"] 18 | build-backend = "poetry.core.masonry.api" 19 | 20 | [tool.poetry.dependencies] 21 | python = ">=3.10,<3.13" 22 | openai = "^1.0" 23 | termcolor = "2.3.0" 24 | typer = ">=0.3.2" 25 | rudder-sdk-python = ">=2.0.2" 26 | dataclasses-json = "0.5.7" 27 | tiktoken = ">=0.0.4" 28 | tabulate = "0.9.0" 29 | python-dotenv = ">=0.21.0" 30 | langchain = ">=0.1.2" 31 | langchain_openai = "*" 32 | toml = ">=0.10.2" 33 | tomlkit = "^0.12.4" 34 | pyperclip = "^1.8.2" 35 | langchain-anthropic = "^0.1.1" 36 | regex = "^2023.12.25" 37 | pillow = "^10.2.0" 38 | datasets = "^2.17.1" 39 | black = "23.3.0" 40 | langchain-community = "^0.2.0" 41 | 42 | [tool.poetry.group.dev.dependencies] 43 | pytest = ">=7.3.1" 44 | pytest-cov = "^4.1.0" 45 | mypy = "1.3.0" 46 | ruff = ">=0.0.272" 47 | pre-commit = "3.3.3" 48 | tox = ">=3.0.0" 49 | 50 | [tool.poetry.group.docs.dependencies] 51 | autodoc_pydantic = ">=1.8.0" 52 | myst_parser = ">=0.18.1" 53 | nbsphinx = ">=0.8.9" 54 | sphinx = ">=5.0.0" 55 | sphinx-autobuild = ">=2021.3.14" 56 | sphinx_book_theme = ">=0.3.3" 57 | sphinx_rtd_theme = ">=1.0.0" 58 | sphinx-typlog-theme = ">=0.8.0" 59 | toml = ">=0.10.2" 60 | myst-nb = ">=0.17.1" 61 | linkchecker = ">=10.2.1" 62 | sphinx-copybutton = ">=0.5.1" 63 | markdown-include = ">=0.6.0" 64 | sphinx_copybutton = ">=0.5.2" 65 | 66 | [tool.poetry.scripts] 67 | gpt-engineer = 'gpt_engineer.applications.cli.main:app' 68 | ge = 'gpt_engineer.applications.cli.main:app' 69 | gpte = 'gpt_engineer.applications.cli.main:app' 70 | bench = 'gpt_engineer.benchmark.__main__:app' 71 | gpte_test_application = 'tests.caching_main:app' 72 | 73 | [tool.poetry.extras] 74 | test = ["pytest", "pytest-cov"] 75 | doc = [ 76 | "autodoc_pydantic", 77 | "myst_parser", 78 | "nbsphinx", 79 | "sphinx", 80 | "sphinx-autobuild", 81 | "sphinx_book_theme", 82 | "sphinx_rtd_theme", 83 | "sphinx-typlog-theme", 84 | "myst-nb", 85 | "linkchecker", 86 | "sphinx-copybutton", 87 | "markdown-include", 88 | "sphinx_copybutton", 89 | ] 90 | 91 | [tool.ruff] 92 | select = ["F", "E", "W", "I001"] 93 | show-fixes = false 94 | target-version = "py310" 95 | task-tags = ["TODO", "FIXME"] 96 | extend-ignore = ["E501", "E722"] 97 | 98 | [tool.black] 99 | target-version = ["py310"] 100 | 101 | [tool.ruff.isort] 102 | known-first-party = [] 103 | known-third-party = [] 104 | section-order = [ 105 | "future", 106 | "standard-library", 107 | "third-party", 108 | "first-party", 109 | "local-folder", 110 | ] 111 | combine-as-imports = true 112 | split-on-trailing-comma = false 113 | lines-between-types = 1 114 | 115 | [tool.pytest.ini_options] 116 | markers = [ 117 | "requires_key: marks tests as requiring access to a valid OPENAI_API_KEY (deselect with '-m \"not requires_key\"')", 118 | ] 119 | -------------------------------------------------------------------------------- /scripts/clean_benchmarks.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides functionality to clean up benchmark directories by removing 3 | all files and folders except for 'prompt' and 'main_prompt'. 4 | """ 5 | 6 | # list all folders in benchmark folder 7 | # for each folder, run the benchmark 8 | 9 | import os 10 | import shutil 11 | 12 | from pathlib import Path 13 | 14 | from typer import run 15 | 16 | 17 | def main(): 18 | """ 19 | Main function that iterates through all directories in the 'benchmark' folder 20 | and cleans them by removing all files and directories except for 'prompt' and 21 | 'main_prompt'. 22 | """ 23 | 24 | benchmarks = Path("benchmark") 25 | 26 | for benchmark in benchmarks.iterdir(): 27 | if benchmark.is_dir(): 28 | print(f"Cleaning {benchmark}") 29 | for path in benchmark.iterdir(): 30 | if path.name in ["prompt", "main_prompt"]: 31 | continue 32 | 33 | # Get filename of Path object 34 | if path.is_dir(): 35 | # delete the entire directory 36 | shutil.rmtree(path) 37 | else: 38 | # delete the file 39 | os.remove(path) 40 | 41 | 42 | if __name__ == "__main__": 43 | run(main) 44 | -------------------------------------------------------------------------------- /scripts/print_chat.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides functionality to print a conversation with messages 3 | colored according to the role of the speaker. 4 | """ 5 | 6 | import json 7 | 8 | import typer 9 | 10 | from termcolor import colored 11 | 12 | app = typer.Typer() 13 | 14 | 15 | def pretty_print_conversation(messages): 16 | """ 17 | Prints a conversation with messages formatted and colored by role. 18 | 19 | Parameters 20 | ---------- 21 | messages : list 22 | A list of message dictionaries, each containing 'role', 'name', and 'content' keys. 23 | 24 | """ 25 | 26 | role_to_color = { 27 | "system": "red", 28 | "user": "green", 29 | "assistant": "blue", 30 | "function": "magenta", 31 | } 32 | formatted_messages = [] 33 | for message in messages: 34 | if message["role"] == "function": 35 | formatted_messages.append( 36 | f"function ({message['name']}): {message['content']}\n" 37 | ) 38 | else: 39 | assistant_content = ( 40 | message["function_call"] 41 | if message.get("function_call") 42 | else message["content"] 43 | ) 44 | role_to_message = { 45 | "system": f"system: {message['content']}\n", 46 | "user": f"user: {message['content']}\n", 47 | "assistant": f"assistant: {assistant_content}\n", 48 | } 49 | formatted_messages.append(role_to_message[message["role"]]) 50 | 51 | for formatted_message in formatted_messages: 52 | role = messages[formatted_messages.index(formatted_message)]["role"] 53 | color = role_to_color[role] 54 | print(colored(formatted_message, color)) 55 | 56 | 57 | @app.command() 58 | def main( 59 | messages_path: str, 60 | ): 61 | """ 62 | Main function that loads messages from a JSON file and prints them using pretty formatting. 63 | 64 | Parameters 65 | ---------- 66 | messages_path : str 67 | The file path to the JSON file containing the messages. 68 | 69 | """ 70 | with open(messages_path) as f: 71 | messages = json.load(f) 72 | 73 | pretty_print_conversation(messages) 74 | 75 | 76 | if __name__ == "__main__": 77 | app() 78 | -------------------------------------------------------------------------------- /scripts/test_api.py: -------------------------------------------------------------------------------- 1 | """This is just a demo to test api.py.""" 2 | 3 | from time import sleep 4 | 5 | import requests 6 | 7 | 8 | def post_data(url, extra_arguments): 9 | """ 10 | Make an HTTP POST request with extra_arguments as data. 11 | 12 | Parameters 13 | ---------- 14 | url : str 15 | The URL to which the POST request should be sent. 16 | extra_arguments : dict 17 | A dictionary of data that needs to be sent in the POST request. 18 | 19 | Returns 20 | ------- 21 | response 22 | The response from the server. 23 | """ 24 | 25 | response = requests.post(url, json=extra_arguments) 26 | return response 27 | 28 | 29 | if __name__ == "__main__": 30 | URL_BASE = "http://127.0.0.1:8000" 31 | 32 | arguments = { 33 | "input": "We are writing snake in python. MVC components split \ 34 | in separate files. Keyboard control.", # our prompt 35 | "additional_input": {"improve_option": False}, 36 | } 37 | 38 | # create a task 39 | response = post_data(f"{URL_BASE}/agent/tasks", arguments) 40 | print(response.json()) 41 | task_id = response.json()["task_id"] 42 | 43 | sleep(1) # this is not needed 44 | 45 | # execute the step for our task 46 | response = post_data(f"{URL_BASE}/agent/tasks/{task_id}/steps", {}) 47 | print(response.json()) 48 | -------------------------------------------------------------------------------- /sweep.yaml: -------------------------------------------------------------------------------- 1 | # Sweep AI turns bug fixes & feature requests into code changes (https://sweep.dev) 2 | # For details on our config file, check out our docs at https://docs.sweep.dev 3 | 4 | # If you use this be sure to frequently sync your default branch(main, master) to dev. 5 | branch: 'main' 6 | # By default Sweep will read the logs and outputs from your existing Github Actions. To disable this, set this to false. 7 | gha_enabled: True 8 | description: 'Python project for AI code generation with next token prediction LLMs. Various AI steps are carried out in steps.py. Disk access via DB objects.' 9 | 10 | # Default Values: https://github.com/sweepai/sweep/blob/main/sweep.yaml 11 | sandbox: 12 | install: 13 | - pre-commit install 14 | check: 15 | - pre-commit run --files {file_path} 16 | 17 | rules: 18 | - Ensure all new functions and classes have very clear, concise and up-to-date docstrings. Take gpt_engineer/ai.py as a good example. 19 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/__init__.py -------------------------------------------------------------------------------- /tests/applications/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/applications/__init__.py -------------------------------------------------------------------------------- /tests/applications/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/applications/cli/__init__.py -------------------------------------------------------------------------------- /tests/applications/cli/test_collect.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests the collect_learnings function in the cli/collect module. 3 | """ 4 | 5 | import pytest 6 | 7 | # def test_collect_learnings(monkeypatch): 8 | # monkeypatch.setattr(rudder_analytics, "track", MagicMock()) 9 | # 10 | # model = "test_model" 11 | # temperature = 0.5 12 | # steps = [simple_gen] 13 | # dbs = FileRepositories( 14 | # OnDiskRepository("/tmp"), 15 | # OnDiskRepository("/tmp"), 16 | # OnDiskRepository("/tmp"), 17 | # OnDiskRepository("/tmp"), 18 | # OnDiskRepository("/tmp"), 19 | # OnDiskRepository("/tmp"), 20 | # OnDiskRepository("/tmp"), 21 | # ) 22 | # dbs.input = { 23 | # "prompt": "test prompt\n with newlines", 24 | # "feedback": "test feedback", 25 | # } 26 | # code = "this is output\n\nit contains code" 27 | # dbs.logs = {steps[0].__name__: json.dumps([{"role": "system", "content": code}])} 28 | # dbs.memory = {"all_output.txt": "test workspace\n" + code} 29 | # 30 | # collect_learnings(model, temperature, steps, dbs) 31 | # 32 | # learnings = extract_learning( 33 | # model, temperature, steps, dbs, steps_file_hash=steps_file_hash() 34 | # ) 35 | # assert rudder_analytics.track.call_count == 1 36 | # assert rudder_analytics.track.call_args[1]["event"] == "learning" 37 | # a = { 38 | # k: v 39 | # for k, v in rudder_analytics.track.call_args[1]["properties"].items() 40 | # if k != "timestamp" 41 | # } 42 | # b = {k: v for k, v in learnings.to_dict().items() if k != "timestamp"} 43 | # assert a == b 44 | # 45 | # assert json.dumps(code) in learnings.logs 46 | # assert code in learnings.workspace 47 | 48 | 49 | if __name__ == "__main__": 50 | pytest.main(["-v"]) 51 | -------------------------------------------------------------------------------- /tests/applications/cli/test_collection_consent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the revised data collection consent mechanism in the cli/learning module. 3 | """ 4 | 5 | from pathlib import Path 6 | from unittest.mock import patch 7 | 8 | import pytest 9 | 10 | from gpt_engineer.applications.cli.learning import ( 11 | ask_collection_consent, 12 | check_collection_consent, 13 | ) 14 | 15 | 16 | # Use a fixture to clean up created files after each test 17 | @pytest.fixture 18 | def cleanup(): 19 | yield 20 | if Path(".gpte_consent").exists(): 21 | Path(".gpte_consent").unlink() 22 | 23 | 24 | """ 25 | Test the following 4 scenarios for check_collection_consent(): 26 | * The .gpte_consent file exists and its content is "true". 27 | * The .gpte_consent file exists but its content is not "true". 28 | * The .gpte_consent file does not exist and the user gives consent when asked. 29 | * The .gpte_consent file does not exist and the user does not give consent when asked. 30 | """ 31 | 32 | 33 | def test_check_consent_file_exists_and_true(cleanup): 34 | Path(".gpte_consent").write_text("true") 35 | assert check_collection_consent() is True 36 | 37 | 38 | def test_check_consent_file_exists_and_false(cleanup): 39 | Path(".gpte_consent").write_text("false") 40 | with patch("builtins.input", side_effect=["n"]): 41 | assert check_collection_consent() is False 42 | 43 | 44 | def test_check_consent_file_not_exists_and_user_says_yes(cleanup): 45 | with patch("builtins.input", side_effect=["y"]): 46 | assert check_collection_consent() is True 47 | assert Path(".gpte_consent").exists() 48 | assert Path(".gpte_consent").read_text() == "true" 49 | 50 | 51 | def test_check_consent_file_not_exists_and_user_says_no(cleanup): 52 | with patch("builtins.input", side_effect=["n"]): 53 | assert check_collection_consent() is False 54 | assert not Path(".gpte_consent").exists() 55 | 56 | 57 | """ 58 | Test the following 4 scenarios for ask_collection_consent(): 59 | 1. The user immediately gives consent with "y": 60 | * The .gpte_consent file is created with content "true". 61 | * The function returns True. 62 | 2. The user immediately denies consent with "n": 63 | * The .gpte_consent file is not created. 64 | * The function returns False. 65 | 3. The user first provides an invalid response, then gives consent with "y": 66 | * The user is re-prompted after the invalid input. 67 | * The .gpte_consent file is created with content "true". 68 | * The function returns True. 69 | 4. The user first provides an invalid response, then denies consent with "n": 70 | * The user is re-prompted after the invalid input. 71 | * The .gpte_consent file is not created. 72 | * The function returns False. 73 | """ 74 | 75 | 76 | def test_ask_collection_consent_yes(cleanup): 77 | with patch("builtins.input", side_effect=["y"]): 78 | result = ask_collection_consent() 79 | assert Path(".gpte_consent").exists() 80 | assert Path(".gpte_consent").read_text() == "true" 81 | assert result is True 82 | 83 | 84 | def test_ask_collection_consent_no(cleanup): 85 | with patch("builtins.input", side_effect=["n"]): 86 | result = ask_collection_consent() 87 | assert not Path(".gpte_consent").exists() 88 | assert result is False 89 | 90 | 91 | def test_ask_collection_consent_invalid_then_yes(cleanup): 92 | with patch("builtins.input", side_effect=["invalid", "y"]): 93 | result = ask_collection_consent() 94 | assert Path(".gpte_consent").exists() 95 | assert Path(".gpte_consent").read_text() == "true" 96 | assert result is True 97 | 98 | 99 | def test_ask_collection_consent_invalid_then_no(cleanup): 100 | with patch("builtins.input", side_effect=["invalid", "n"]): 101 | result = ask_collection_consent() 102 | assert not Path(".gpte_consent").exists() 103 | assert result is False 104 | -------------------------------------------------------------------------------- /tests/applications/cli/test_learning.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from gpt_engineer.applications.cli import learning 4 | from gpt_engineer.applications.cli.learning import Learning 5 | from gpt_engineer.core.default.disk_memory import DiskMemory 6 | from gpt_engineer.core.prompt import Prompt 7 | 8 | 9 | def test_human_review_input_no_concent_returns_none(): 10 | with mock.patch.object(learning, "check_collection_consent", return_value=False): 11 | result = learning.human_review_input() 12 | 13 | assert result is None 14 | 15 | 16 | def test_human_review_input_consent_code_ran_no_comments(): 17 | with ( 18 | mock.patch.object(learning, "check_collection_consent", return_value=True), 19 | mock.patch("builtins.input", return_value="y"), 20 | ): 21 | result = learning.human_review_input() 22 | 23 | assert result.raw == "y, y, " 24 | assert result.ran is True 25 | assert result.works is None 26 | assert result.comments == "" 27 | 28 | 29 | def test_human_review_input_consent_code_ran_not_perfect_but_useful_no_comments(): 30 | with ( 31 | mock.patch.object(learning, "check_collection_consent", return_value=True), 32 | mock.patch("builtins.input", side_effect=["y", "n", "y", ""]), 33 | ): 34 | result = learning.human_review_input() 35 | 36 | assert result.raw == "y, n, y" 37 | assert result.ran is True 38 | assert result.works is True 39 | assert result.comments == "" 40 | 41 | 42 | def test_check_collection_consent_yes(): 43 | gpte_consent_mock = mock.Mock() 44 | gpte_consent_mock.exists.return_value = True 45 | gpte_consent_mock.read_text.return_value = "true" 46 | 47 | with mock.patch.object(learning, "Path", return_value=gpte_consent_mock): 48 | result = learning.check_collection_consent() 49 | 50 | assert result is True 51 | 52 | 53 | def test_check_collection_consent_no_ask_collection_consent(): 54 | with mock.patch.object(learning, "Path") as gpte_consent_mock: 55 | gpte_consent_mock.exists.return_value = True 56 | gpte_consent_mock.read_text.return_value = "false" 57 | 58 | with mock.patch.object(learning, "ask_collection_consent", return_value=True): 59 | result = learning.check_collection_consent() 60 | 61 | assert result is True 62 | 63 | 64 | def test_ask_collection_consent_yes(): 65 | with mock.patch("builtins.input", return_value="y"): 66 | result = learning.ask_collection_consent() 67 | 68 | assert result is True 69 | 70 | 71 | def test_ask_collection_consent_no(): 72 | with mock.patch("builtins.input", return_value="n"): 73 | result = learning.ask_collection_consent() 74 | 75 | assert result is False 76 | 77 | 78 | def test_extract_learning(): 79 | review = learning.Review( 80 | raw="y, n, y", 81 | ran=True, 82 | works=True, 83 | perfect=False, 84 | comments="The code is not perfect", 85 | ) 86 | memory = mock.Mock(spec=DiskMemory) 87 | memory.to_json.return_value = {"prompt": "prompt"} 88 | 89 | result = learning.extract_learning( 90 | Prompt("prompt"), 91 | "model_name", 92 | 0.01, 93 | ("prompt_tokens", "completion_tokens"), 94 | memory, 95 | review, 96 | ) 97 | 98 | assert isinstance(result, Learning) 99 | 100 | 101 | def test_get_session(): 102 | with mock.patch.object(learning, "Path") as path_mock: 103 | # can be better tested with pyfakefs. 104 | path_mock.return_value.__truediv__.return_value.exists.return_value = False 105 | 106 | with mock.patch.object(learning, "random") as random_mock: 107 | random_mock.randint.return_value = 42 108 | result = learning.get_session() 109 | 110 | assert result == "42" 111 | -------------------------------------------------------------------------------- /tests/benchmark/test_BenchConfig.py: -------------------------------------------------------------------------------- 1 | # Generated by CodiumAI 2 | 3 | import pytest 4 | 5 | from gpt_engineer.benchmark.bench_config import ( 6 | AppsConfig, 7 | BenchConfig, 8 | GptmeConfig, 9 | MbppConfig, 10 | ) 11 | 12 | 13 | class TestBenchConfig: 14 | # Creating a BenchConfig object with default values should return an instance of BenchConfig with all attributes set to their default values. 15 | def test_default_values(self): 16 | config = BenchConfig() 17 | assert isinstance(config.apps, AppsConfig) 18 | assert isinstance(config.mbpp, MbppConfig) 19 | assert isinstance(config.gptme, GptmeConfig) 20 | assert config.apps.active is True 21 | assert config.apps.test_start_index == 0 22 | assert config.apps.test_end_index == 1 23 | assert config.apps.train_start_index == 0 24 | assert config.apps.train_end_index == 0 25 | assert config.mbpp.active is True 26 | assert config.mbpp.test_len == 1 27 | assert config.mbpp.train_len == 0 28 | assert config.gptme.active is True 29 | 30 | # Creating a BenchConfig object with specific values should return an instance of BenchConfig with the specified attributes set to the specified values. 31 | def test_specific_values(self): 32 | config = BenchConfig( 33 | apps=AppsConfig( 34 | active=False, 35 | test_start_index=1, 36 | test_end_index=2, 37 | train_start_index=3, 38 | train_end_index=4, 39 | ), 40 | mbpp=MbppConfig(active=False, test_len=5, train_len=6), 41 | gptme=GptmeConfig(active=False), 42 | ) 43 | assert isinstance(config.apps, AppsConfig) 44 | assert isinstance(config.mbpp, MbppConfig) 45 | assert isinstance(config.gptme, GptmeConfig) 46 | assert config.apps.active is False 47 | assert config.apps.test_start_index == 1 48 | assert config.apps.test_end_index == 2 49 | assert config.apps.train_start_index == 3 50 | assert config.apps.train_end_index == 4 51 | assert config.mbpp.active is False 52 | assert config.mbpp.test_len == 5 53 | assert config.mbpp.train_len == 6 54 | assert config.gptme.active is False 55 | 56 | # Calling the from_dict method with a valid dictionary should return an instance of BenchConfig with attributes set according to the values in the dictionary. 57 | def test_from_dict_valid_dict(self): 58 | config_dict = { 59 | "apps": { 60 | "active": False, 61 | "test_start_index": 1, 62 | "test_end_index": 2, 63 | "train_start_index": 3, 64 | "train_end_index": 4, 65 | }, 66 | "mbpp": {"active": False, "test_len": 5, "train_len": 6}, 67 | "gptme": {"active": False}, 68 | } 69 | config = BenchConfig.from_dict(config_dict) 70 | assert isinstance(config.apps, AppsConfig) 71 | assert isinstance(config.mbpp, MbppConfig) 72 | assert isinstance(config.gptme, GptmeConfig) 73 | assert config.apps.active is False 74 | assert config.apps.test_start_index == 1 75 | assert config.apps.test_end_index == 2 76 | assert config.apps.train_start_index == 3 77 | assert config.apps.train_end_index == 4 78 | assert config.mbpp.active is False 79 | assert config.mbpp.test_len == 5 80 | assert config.mbpp.train_len == 6 81 | assert config.gptme.active is False 82 | 83 | # Calling the from_toml method with an invalid path to a TOML file should raise an appropriate exception. 84 | def test_from_toml_invalid_path(self): 85 | config_file = "invalid_config.toml" 86 | with pytest.raises(Exception): 87 | BenchConfig.from_toml(config_file) 88 | -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/core/__init__.py -------------------------------------------------------------------------------- /tests/core/default/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/core/default/__init__.py -------------------------------------------------------------------------------- /tests/core/default/test_disk_execution_env.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | 4 | from unittest.mock import MagicMock, patch 5 | 6 | from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv 7 | 8 | # from gpt_engineer.core.default.git_version_manager import GitVersionManager 9 | from gpt_engineer.core.default.paths import ENTRYPOINT_FILE 10 | from gpt_engineer.core.files_dict import FilesDict 11 | 12 | 13 | class TestOnDiskExecutionEnv(unittest.TestCase): 14 | def setUp(self): 15 | self.temp_dir = tempfile.TemporaryDirectory() 16 | self.env = DiskExecutionEnv() 17 | 18 | def tearDown(self): 19 | self.temp_dir.cleanup() 20 | 21 | def test_successful_execution(self): 22 | entrypoint_content = """ 23 | python -m venv venv 24 | source venv/bin/activate 25 | python script.py 26 | """ 27 | code = { 28 | ENTRYPOINT_FILE: entrypoint_content, 29 | "script.py": "print('This is a test script')", 30 | } 31 | with patch("subprocess.Popen") as mock_popen: 32 | mock_popen.return_value.wait.return_value = 0 33 | process = self.env.upload(FilesDict(code)).popen(f"bash {ENTRYPOINT_FILE}") 34 | self.assertIsNotNone(process) 35 | mock_popen.assert_called_once() 36 | 37 | def test_missing_entrypoint(self): 38 | code = {"script.py": "print('This is a test script')"} 39 | p = self.env.upload(FilesDict(code)).popen(f"bash {ENTRYPOINT_FILE}") 40 | p.communicate() 41 | assert p.returncode != 0 42 | 43 | def test_keyboard_interrupt_handling(self): 44 | entrypoint_content = """ 45 | python script.py 46 | """ 47 | code = { 48 | ENTRYPOINT_FILE: entrypoint_content, 49 | "script.py": "print('This is a test script')", 50 | } 51 | with patch("subprocess.Popen") as mock_popen: 52 | mock_process = MagicMock() 53 | mock_process.poll.side_effect = KeyboardInterrupt 54 | mock_popen.return_value = mock_process 55 | stdout_full, stderr_full, returncode = self.env.upload(FilesDict(code)).run( 56 | f"bash {ENTRYPOINT_FILE}" 57 | ) 58 | mock_process.kill.assert_called_once() 59 | 60 | def test_execution_with_output(self): 61 | entrypoint_content = """ 62 | python script.py 63 | """ 64 | code = { 65 | ENTRYPOINT_FILE: entrypoint_content, 66 | "script.py": "import sys; print('Out'); sys.stderr.write('Error')", 67 | } 68 | with patch("subprocess.Popen") as mock_popen: 69 | process = MagicMock() 70 | process.wait.return_value = 0 71 | process.communicate.return_value = (b"Out\n", b"Error\n") 72 | mock_popen.return_value = process 73 | process = self.env.upload(FilesDict(code)).popen(f"bash {ENTRYPOINT_FILE}") 74 | stdout, stderr = process.communicate() 75 | self.assertEqual(stdout, b"Out\n") 76 | self.assertEqual(stderr, b"Error\n") 77 | 78 | 79 | if __name__ == "__main__": 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /tests/core/default/test_simple_agent.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | import pytest 4 | 5 | from langchain.schema import AIMessage 6 | 7 | from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv 8 | from gpt_engineer.core.default.paths import ENTRYPOINT_FILE 9 | from gpt_engineer.core.default.simple_agent import SimpleAgent 10 | from gpt_engineer.core.files_dict import FilesDict 11 | from gpt_engineer.core.prompt import Prompt 12 | from tests.mock_ai import MockAI 13 | 14 | 15 | def test_init(): 16 | temp_dir = tempfile.mkdtemp() 17 | mock_ai = MockAI( 18 | [ 19 | AIMessage( 20 | "hello_world.py\n```\nwith open('output.txt', 'w') as file:\n file.write('Hello World!')\n```" 21 | ), 22 | AIMessage("```run.sh\npython3 hello_world.py\n```"), 23 | ], 24 | ) 25 | lean_agent = SimpleAgent.with_default_config(temp_dir, mock_ai) 26 | outfile = "output.txt" 27 | code = lean_agent.init( 28 | Prompt( 29 | f"Make a program that prints 'Hello World!' to a file called '{outfile}'" 30 | ) 31 | ) 32 | 33 | env = DiskExecutionEnv() 34 | env.upload(code).run(f"bash {ENTRYPOINT_FILE}") 35 | code = env.download() 36 | 37 | assert outfile in code 38 | assert code[outfile] == "Hello World!" 39 | 40 | 41 | def test_improve(): 42 | temp_dir = tempfile.mkdtemp() 43 | code = FilesDict( 44 | { 45 | "main.py": "def write_hello_world_to_file(filename):\n \"\"\"\n Writes 'Hello World!' to the specified file.\n \n :param filename: The name of the file to write to.\n \"\"\"\n with open(filename, 'w') as file:\n file.write('Hello World!')\n\nif __name__ == \"__main__\":\n output_filename = 'output.txt'\n write_hello_world_to_file(output_filename)", 46 | "requirements.txt": "# No dependencies required", 47 | "run.sh": "python3 main.py\n", 48 | } 49 | ) 50 | mock_ai = MockAI( 51 | [ 52 | AIMessage( 53 | "```diff\n--- main.py\n+++ main.py\n@@ -7,3 +7,3 @@\n with open(filename, 'w') as file:\n- file.write('Hello World!')\n+ file.write('!dlroW olleH')\n```" 54 | ) 55 | ] 56 | ) 57 | lean_agent = SimpleAgent.with_default_config(temp_dir, mock_ai) 58 | code = lean_agent.improve( 59 | code, 60 | Prompt( 61 | "Change the program so that it prints '!dlroW olleH' instead of 'Hello World!' " 62 | ), 63 | f"bash {ENTRYPOINT_FILE}", 64 | ) 65 | 66 | env = DiskExecutionEnv() 67 | env.upload(code).run(f"bash {ENTRYPOINT_FILE}") 68 | code = env.download() 69 | 70 | outfile = "output.txt" 71 | assert outfile in code 72 | assert code[outfile] == "!dlroW olleH" 73 | 74 | 75 | if __name__ == "__main__": 76 | pytest.main() 77 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/apps_benchmark_6_chat: -------------------------------------------------------------------------------- 1 | To implement this program, we will follow a structured approach. We will create a Python script that takes command-line arguments for the inputs, parses them, and then processes each query to determine the minimum number of blows required to defeat Zmei Gorynich. The program will consist of a main function that handles input parsing and output formatting, and a separate function that calculates the minimum number of blows for each query. 2 | 3 | We will need to: 4 | 1. Parse the command-line arguments to extract the queries. 5 | 2. For each query, calculate the minimum number of blows required. 6 | 3. Print the result for each query. 7 | 8 | Let's start by creating the `main.py` file with the necessary imports and the skeleton of our program. 9 | 10 | ```diff 11 | --- /dev/null 12 | +++ main.py 13 | @@ -0,0 +1,36 @@ 14 | +import sys 15 | + 16 | +def calculate_min_blows(n, x, blows): 17 | + """ 18 | + Calculate the minimum number of blows to defeat Zmei Gorynich. 19 | + :param n: Number of types of blows. 20 | + :param x: Initial number of heads. 21 | + :param blows: List of tuples (d_i, h_i) for each blow type. 22 | + :return: Minimum number of blows or -1 if impossible. 23 | + """ 24 | + # Implementation will be added here 25 | + pass 26 | + 27 | +def main(): 28 | + # Parse command-line arguments 29 | + args = sys.argv[1].split() 30 | + t = int(args[0]) 31 | + queries = [] 32 | + idx = 1 33 | + for _ in range(t): 34 | + n, x = map(int, args[idx:idx+2]) 35 | + idx += 2 36 | + blows = [tuple(map(int, args[idx:idx+2])) for _ in range(n)] 37 | + idx += 2*n 38 | + queries.append((n, x, blows)) 39 | + 40 | + # Process each query 41 | + for n, x, blows in queries: 42 | + result = calculate_min_blows(n, x, blows) 43 | + print(result) 44 | + 45 | +if __name__ == "__main__": 46 | + main() 47 | ``` 48 | Now, let's implement the logic in calculate_min_blows function to determine the minimum number of blows required to defeat Zmei Gorynich. 49 | ```diff 50 | --- main.py 51 | +++ main.py 52 | @@ -8,7 +8,25 @@ 53 | + # Implementation will be added here 54 | + max_damage = max(blows, key=lambda x: x[0]) 55 | + max_diff = max(blows, key=lambda x: x[0] - x[1]) 56 | + if max_damage[0] >= x: 57 | + return 1 58 | + if max_diff[0] <= max_diff[1] and max_damage[0] < x: 59 | + return -1 60 | + effective_damage = max_diff[0] - max_diff[1] 61 | + remaining_heads = x - max_damage[0] 62 | + blows_required = 1 + (remaining_heads + effective_damage - 1) // effective_damage 63 | + return blows_required 64 | - pass 65 | ``` 66 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/apps_benchmark_6_code: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/core/improve_function_test_cases/apps_benchmark_6_code -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/apps_benchmark_6_v2_chat: -------------------------------------------------------------------------------- 1 | ```diff 2 | --- main.py 3 | +++ main.py 4 | @@ -8,7 +8,25 @@ 5 | + # Implementation will be added here 6 | + max_damage = max(blows, key=lambda x: x[0]) 7 | + max_diff = max(blows, key=lambda x: x[0] - x[1]) 8 | + if max_damage[0] >= x: 9 | + return 1 10 | + if max_diff[0] <= max_diff[1] and max_damage[0] < x: 11 | + return -1 12 | + effective_damage = max_diff[0] - max_diff[1] 13 | + remaining_heads = x - max_damage[0] 14 | + blows_required = 1 + (remaining_heads + effective_damage - 1) // effective_damage 15 | + return blows_required 16 | - pass 17 | ``` 18 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/apps_benchmark_6_v2_code: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def calculate_min_blows(n, x, blows): 4 | """ 5 | Calculate the minimum number of blows to defeat Zmei Gorynich. 6 | :param n: Number of types of blows. 7 | :param x: Initial number of heads. 8 | :param blows: List of tuples (d_i, h_i) for each blow type. 9 | :return: Minimum number of blows or -1 if impossible. 10 | """ 11 | # Implementation will be added here 12 | pass 13 | 14 | def main(): 15 | # Parse command-line arguments 16 | args = sys.argv[1].split() 17 | t = int(args[0]) 18 | queries = [] 19 | idx = 1 20 | for _ in range(t): 21 | n, x = map(int, args[idx:idx+2]) 22 | idx += 2 23 | blows = [tuple(map(int, args[idx:idx+2])) for _ in range(n)] 24 | idx += 2*n 25 | queries.append((n, x, blows)) 26 | 27 | # Process each query 28 | for n, x, blows in queries: 29 | result = calculate_min_blows(n, x, blows) 30 | print(result) 31 | 32 | if __name__ == "__main__": 33 | main()""" 34 | 35 | print(code) 36 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/controller_chat: -------------------------------------------------------------------------------- 1 | Planning: 2 | 3 | 1. Modify the `handle_input` method within the `Controller` class to change the behavior of the snake when certain keys are pressed. 4 | 2. When the "up" key is pressed, we need to queue two actions: first to move "down" and then to move "right" after one step. This will require storing the next move in the `Controller` class and checking for it on subsequent calls to `handle_input`. 5 | 3. When the "down" key is pressed, the snake should move "up". 6 | 4. When the "left" key is pressed, the snake should move "right". 7 | 5. When the "right" key is pressed, the snake should move "left". 8 | 6. Remove the long, unnecessary comment block. 9 | 7. Since we need to queue actions, we will introduce a new attribute `next_move` to the `Controller` class to store the next move. 10 | 8. Update the `__init__` method to initialize `next_move` as `None`. 11 | 9. Modify the `handle_input` method to check if there is a `next_move` queued before checking the keyboard input. If there is a `next_move`, execute it and clear the `next_move`. 12 | 10. Update the `handle_input` method to set the `next_move` to "right" when the "up" key is pressed, after moving "down". 13 | 11. Update the `handle_input` method to handle the reversed directions for "down", "left", and "right" keys. 14 | 15 | Generate patches: 16 | 17 | ```diff 18 | --- controller.py 19 | +++ controller.py 20 | @@ -4,6 +4,7 @@ 21 | class Controller: 22 | def __init__(self, game, view): 23 | self.game = game 24 | self.view = view 25 | + self.next_move = None 26 | 27 | def handle_input(self): 28 | + if self.next_move: 29 | + self.game.move(self.next_move) 30 | + self.next_move = None 31 | + else: 32 | - if keyboard.is_pressed("up"): 33 | - self.game.move("up") 34 | + if keyboard.is_pressed("up"): 35 | + self.game.move("down") 36 | + self.next_move = "right" 37 | - elif keyboard.is_pressed("down"): 38 | + elif keyboard.is_pressed("down"): 39 | - """ 40 | - long 41 | - goring comment 42 | - 43 | - 44 | - 45 | - 46 | - 47 | - 48 | - 49 | - 50 | - """ 51 | - self.game.move("down") 52 | + self.game.move("up") 53 | - elif keyboard.is_pressed("left"): 54 | - self.game.move("left") 55 | + elif keyboard.is_pressed("left"): 56 | + self.game.move("right") 57 | - elif keyboard.is_pressed("right"): 58 | - self.game.move("right") 59 | + elif keyboard.is_pressed("right"): 60 | + self.game.move("left") 61 | ``` 62 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/controller_code: -------------------------------------------------------------------------------- 1 | import keyboard 2 | 3 | 4 | class Controller: 5 | def __init__(self, game, view): 6 | self.game = game 7 | self.view = view 8 | 9 | def handle_input(self): 10 | if keyboard.is_pressed("up"): 11 | self.game.move("up") 12 | elif keyboard.is_pressed("down"): 13 | """ 14 | long 15 | goring comment 16 | 17 | 18 | 19 | 20 | 21 | 22 | """ 23 | self.game.move("down") 24 | elif keyboard.is_pressed("left"): 25 | self.game.move("left") 26 | elif keyboard.is_pressed("right"): 27 | self.game.move("right") 28 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/corrected_diff_from_missing_lines: -------------------------------------------------------------------------------- 1 | --- example.txt 2 | +++ example.txt 3 | @@ -12,7 +12,8 @@ 4 | sample text 1 5 | sample text 2 6 | + added extra line here 7 | # 8 | #comment 9 | # 10 | # 11 | - original text A 12 | + updated original text A with changes 13 | @@ -39,4 +40,5 @@ 14 | checking status: 15 | - perform operation X 16 | + perform operation X only if specific condition holds 17 | + new operation related to condition 18 | evaluating next step: 19 | - execute step Y 20 | + revised execution of step Y 21 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/create_two_new_files_chat: -------------------------------------------------------------------------------- 1 | To implement the requested functionality, we'll create a Python program that processes the input, evaluates the service history of a car, and outputs the appropriate message based on the criteria provided. We'll structure the program into several parts: 2 | 3 | 1. **Parsing Input**: A function to parse the input string into a structured format. 4 | 2. **Service History Evaluation**: A class to evaluate the service history entries and determine the status of the odometer and service frequency. 5 | 3. **Main Logic**: The main logic that ties everything together and outputs the final result. 6 | 7 | Let's start by creating the necessary files and writing the initial code. 8 | 9 | ### File: requirements.txt 10 | This file will list the dependencies required for the project. Since the project does not require external libraries, this file will be empty but included for completeness. 11 | 12 | ```diff 13 | ``` 14 | 15 | ### File: service_history_evaluator.py 16 | This file will contain the class responsible for evaluating the service history. 17 | 18 | ```diff 19 | --- /dev/null 20 | +++ service_history_evaluator.py 21 | @@ -0,0 +1,47 @@ 22 | +from datetime import datetime, timedelta 23 | + 24 | +class ServiceHistoryEvaluator: 25 | + """ 26 | + Evaluates the service history of a car to determine if the odometer has been tampered with 27 | + or if the service frequency is insufficient. 28 | + """ 29 | + def __init__(self, entries): 30 | + self.entries = entries 31 | + 32 | + def evaluate(self): 33 | + # Placeholder for the evaluation logic 34 | + pass 35 | + 36 | + def _parse_date(self, year, month): 37 | + """ 38 | + Parses the year and month into a datetime object. 39 | + """ 40 | + return datetime(year, month, 1) 41 | + 42 | + def _calculate_distance(self, start, end): 43 | + """ 44 | + Calculates the minimum and maximum possible distance traveled between two services. 45 | + """ 46 | + months_diff = (end.year - start.year) * 12 + end.month - start.month 47 | + min_distance = 2000 * months_diff 48 | + max_distance = 20000 * months_diff 49 | + return min_distance, max_distance 50 | + 51 | + def _check_odometer_rollover(self, prev_reading, current_reading, max_distance): 52 | + """ 53 | + Checks if the odometer could have rolled over between two readings. 54 | + """ 55 | + if prev_reading > current_reading: 56 | + return current_reading + (99999 - prev_reading) <= max_distance 57 | + else: 58 | + return current_reading - prev_reading <= max_distance 59 | + 60 | + def _check_service_frequency(self): 61 | + """ 62 | + Checks if the service frequency is within the legal limits. 63 | + """ 64 | + # Placeholder for the service frequency check logic 65 | + pass 66 | ``` 67 | 68 | ### File: main.py 69 | This file will contain the main logic and the entry point of the program. 70 | 71 | ```diff 72 | --- /dev/null 73 | +++ main.py 74 | @@ -0,0 +1,29 @@ 75 | +import sys 76 | +from service_history_evaluator import ServiceHistoryEvaluator 77 | + 78 | +def parse_input(input_str): 79 | + """ 80 | + Parses the input string into a structured format. 81 | + """ 82 | + lines = input_str.split('\n') 83 | + num_entries = int(lines[0]) 84 | + entries = [tuple(map(int, line.split())) for line in lines[1:num_entries+1]] 85 | + return entries 86 | + 87 | +def main(input_str): 88 | + entries = parse_input(input_str) 89 | + evaluator = ServiceHistoryEvaluator(entries) 90 | + result = evaluator.evaluate() 91 | + print(result) 92 | + 93 | +if __name__ == "__main__": 94 | + if len(sys.argv) != 2: 95 | + print("Usage: python main.py 'input1 input2 etc'") 96 | + sys.exit(1) 97 | + input_str = sys.argv[1].replace("'", "") 98 | + main(input_str) 99 | ``` 100 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/create_two_new_files_code: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/core/improve_function_test_cases/create_two_new_files_code -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/simple_calculator_chat: -------------------------------------------------------------------------------- 1 | I apologize for the oversight. Let's correct the `calculator.py` file with the proper git diff format, ensuring that the context lines match the original code exactly. 2 | 3 | ```diff 4 | --- calculator.py 5 | +++ calculator.py 6 | @@ -1,24 +1,24 @@ 7 | class Calculator: 8 | - def add(self, a, b): 9 | - return a - b # Logical 10 | + def add(self, a, b): # Adds two numbers 11 | + return a + b 12 | 13 | - def subtract(self, a, b) 14 | + def subtract(self, a, b): # Subtracts second number from first 15 | + return a - b 16 | 17 | - def multiply(self, a, b): 18 | - return lambda a, b: sum(a for _ in range(abs(b))) * (1 if b >= 0 else -1) #unncessary 19 | + def multiply(self, a, b): # Multiplies two numbers 20 | + return a * b 21 | 22 | def divide(self, a, b): 23 | if b == 0: 24 | raise ValueError("Cannot divide by zero.") 25 | return a / b 26 | 27 | - def modulo(self, a, b): 28 | - if b = 0: # Syntax 29 | - return None # Logical 30 | + def modulo(self, a, b): # Finds the remainder of division of two numbers 31 | + if b == 0: 32 | + raise ValueError("Cannot modulo by zero.") 33 | return a % b 34 | 35 | def power(self, a, b): # Raises a number to the power of another 36 | result = 1 37 | - for i in range(b): # Logical 38 | + for i in range(abs(b)): 39 | result *= a 40 | - return result 41 | + return result if b >= 0 else 1 / result 42 | ``` 43 | 44 | Now, let's create the `main.py` file with the correct git diff format: 45 | 46 | ```diff 47 | --- /dev/null 48 | +++ main.py 49 | @@ -0,0 +1,30 @@ 50 | +from calculator import Calculator 51 | + 52 | +# Function to demonstrate the usage of the Calculator class 53 | +def main(): 54 | + calc = Calculator() 55 | + while True: 56 | + try: 57 | + a = float(input("Enter the first number: ")) 58 | + b = float(input("Enter the second number: ")) 59 | + operation = input("Enter an operation (add, subtract, multiply, divide, modulo, power): ").lower() 60 | + if operation == 'add': 61 | + print("Result:", calc.add(a, b)) 62 | + elif operation == 'subtract': 63 | + print("Result:", calc.subtract(a, b)) 64 | + elif operation == 'multiply': 65 | + print("Result:", calc.multiply(a, b)) 66 | + elif operation == 'divide': 67 | + print("Result:", calc.divide(a, b)) 68 | + elif operation == 'modulo': 69 | + print("Result:", calc.modulo(a, b)) 70 | + elif operation == 'power': 71 | + print("Result:", calc.power(a, b)) 72 | + else: 73 | + print("Invalid operation.") 74 | + 75 | + if input("Do you want to perform another calculation? (yes/no): ").lower() != 'yes': 76 | + break 77 | + except Exception as e: 78 | + print("Error:", e) 79 | + 80 | +if __name__ == "__main__": 81 | + main() 82 | ``` 83 | 84 | These changes should now correctly apply to the provided code and create a simple calculator program with a command-line interface. 85 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/simple_calculator_code: -------------------------------------------------------------------------------- 1 | class Calculator: 2 | def add(self, a, b): 3 | return a - b # Logical 4 | 5 | def subtract(self, a, b) 6 | return a - b # Syntax 7 | 8 | def multiply(self, a, b): 9 | return lambda a, b: sum(a for _ in range(abs(b))) * (1 if b >= 0 else -1) #unncessary 10 | 11 | def divide(self, a, b): 12 | if b == 0: 13 | raise ValueError("Cannot divide by zero.") 14 | return a / b 15 | 16 | def modulo(self, a, b): 17 | if b = 0: # Syntax 18 | return None # Logical 19 | return a % b 20 | 21 | def power(self, a, b): 22 | result = 1 23 | for i in range(b): # Logical 24 | result *= a 25 | return result 26 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/task_master_chat: -------------------------------------------------------------------------------- 1 | Apologies for the confusion. Let's correct the issues in the code with the proper git diff format: 2 | 3 | ```diff 4 | --- taskmaster.py 5 | +++ taskmaster.py 6 | @@ -1,4 +1,4 @@ 7 | -improt datetime 8 | +import datetime 9 | 10 | class Task: 11 | def __init__(self, title, due_date, priority=1): 12 | @@ -4,7 +4,7 @@ 13 | self.title = tile 14 | self.due_date = due_date 15 | self.completed = Fasle 16 | - self.priority = priority 17 | + self.priority = priority # Priority levels: 1 (low) to 5 (high), default is 1. 18 | 19 | def mark_as_complete(self): 20 | - self.completed = Ture 21 | + self.completed = True 22 | 23 | @@ -13,7 +13,7 @@ 24 | def update_title(self, new_title): 25 | # Correct the typo in the assignment of the new title. 26 | # The correct variable name is 'new_title', not 'new_tile'. 27 | - self.title = new_tile 28 | + self.title = new_title 29 | 30 | def update_due_date(self, new_date): 31 | self.due_date = new_date 32 | @@ -22,7 +22,7 @@ 33 | def add_task(task_list, task): 34 | # Correct the typo in the append method call. 35 | # The correct method name is 'append', not 'appned'. 36 | - task_list.appned(task) 37 | + task_list.append(task) 38 | 39 | def remove_task(task_list, task_title): 40 | for task in task_list: 41 | @@ -26,7 +26,7 @@ 42 | # Correct the typo in the title attribute check. 43 | # The correct attribute name is 'title', not 'titel'. 44 | if task.titel == task_title: 45 | - task_list.remove(task) 46 | + task.title == task_title 47 | break 48 | 49 | def update_task_title(task_list, old_title, new_title): 50 | @@ -49,7 +49,7 @@ 51 | def print_tasks(task_list): 52 | for task in task_list: 53 | # Correct the typo in the title attribute when formatting the string. 54 | # The correct attribute name is 'title', not 'titel'. 55 | - print(f"Task: {task.titel}, Due: {task.due_date}, Completed: {task.completed}, Priority: {task.priority}") 56 | + print(f"Task: {task.title}, Due: {task.due_date}, Completed: {task.completed}, Priority: {task.priority}") 57 | 58 | def load_tasks_from_file(file_path): 59 | tasks = [] 60 | @@ -55,8 +55,8 @@ 61 | with open(file_path, 'r') as file: 62 | # Correct the typo in the readlines method call. 63 | # The correct method name is 'readlines', not 'redlines'. 64 | - lines = file.redlines() 65 | + lines = file.readlines() 66 | for line in lines: 67 | # Correct the typo in the split method call. 68 | # The correct method name is 'split', not 'splt'. 69 | - parts = line.splt(',') 70 | + parts = line.split(',') 71 | task = Task(parts[0], parts[1], int(parts[3])) 72 | if parts[2].strip() == 'True': 73 | task.mark_as_complete() 74 | @@ -72,7 +72,7 @@ 75 | def find_overdue_tasks(task_list): 76 | overdue_tasks = [] 77 | current_date = datetime.datetime.now().date() 78 | for task in task_list: 79 | - task_date = datetime.datetime.strptime(task.due_date, '%Y-%m-%d').date() 80 | + task_date = datetime.datetime.strptime(task.due_date.strip(), '%Y-%m-%d').date() 81 | if task_date < current_date: 82 | # Correct the typo in the append method call. 83 | # The correct method name is 'append', not 'appnd'. 84 | - overdue_tasks.appnd(task) 85 | + overdue_tasks.append(task) 86 | return overdue_tasks 87 | 88 | def search_tasks(task_list, search_term): 89 | @@ -83,7 +83,7 @@ 90 | for task in task_list: 91 | if search_term.lower() in task.title.lower(): 92 | # Correct the typo in the append method call. 93 | # The correct method name is 'append', not 'appnd'. 94 | - found_tasks.appnd(task) 95 | + found_tasks.append(task) 96 | return found_tasks 97 | 98 | task_list = [] 99 | ``` 100 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/task_master_code: -------------------------------------------------------------------------------- 1 | improt datetime 2 | 3 | class Task: 4 | def __init__(self, title, due_date, priority=1): 5 | self.title = tile 6 | self.due_date = due_date 7 | self.completed = Fasle 8 | self.priority = priority 9 | 10 | def mark_as_complete(self): 11 | self.completed = Ture 12 | 13 | def update_title(self, new_title): 14 | self.title = new_tile 15 | 16 | def update_due_date(self, new_date): 17 | self.due_date = new_date 18 | 19 | def set_priority(self, priority): 20 | self.priority = priority 21 | 22 | def add_task(task_list, task): 23 | task_list.appned(task) 24 | 25 | def remove_task(task_list, task_title): 26 | for task in task_list: 27 | if task.titel == task_title: 28 | task_list.remove(task) 29 | break 30 | 31 | def update_task_title(task_list, old_title, new_title): 32 | for task in task_list: 33 | if task.title == old_title: 34 | task.update_title(new_title) 35 | break 36 | 37 | def update_task_due_date(task_list, title, new_date): 38 | for task in task_list: 39 | if task.title == title: 40 | task.update_due_date(new_date) 41 | break 42 | 43 | def set_task_priority(task_list, title, priority): 44 | for task in task_list: 45 | if task.title == title: 46 | task.set_priority(priority) 47 | break 48 | 49 | def print_tasks(task_list): 50 | for task in task_list: 51 | print(f"Task: {task.titel}, Due: {task.due_date}, Completed: {task.completed}, Priority: {task.priority}") 52 | 53 | def load_tasks_from_file(file_path): 54 | tasks = [] 55 | with open(file_path, 'r') as file: 56 | lines = file.redlines() 57 | for line in lines: 58 | parts = line.splt(',') 59 | task = Task(parts[0], parts[1], int(parts[3])) 60 | if parts[2].strip() == 'True': 61 | task.mark_as_complete() 62 | tasks.append(task) 63 | return tasks 64 | 65 | def save_tasks_to_file(task_list, file_path): 66 | with open(file_path, 'w') as file: 67 | for task in task_list: 68 | line = f"{task.title},{task.due_date},{task.completed},{task.priority}\n" 69 | file.write(line) 70 | 71 | def find_overdue_tasks(task_list): 72 | overdue_tasks = [] 73 | current_date = datetime.datetime.now().date() 74 | for task in task_list: 75 | task_date = datetime.datetime.strptime(task.due_date, '%Y-%m-%d').date() 76 | if task_date < current_date: 77 | overdue_tasks.appnd(task) 78 | return overdue_tasks 79 | 80 | def search_tasks(task_list, search_term): 81 | found_tasks = [] 82 | for task in task_list: 83 | if search_term.lower() in task.title.lower(): 84 | found_tasks.appnd(task) 85 | return found_tasks 86 | 87 | task_list = [] 88 | add_task(task_list, Task("Finish project", "2024-01-01", 3)) 89 | add_task(task_list, Task("Buy groceries", "2024-01-05", 2)) 90 | add_task(task_list, Task("Call John", "2024-01-03", 1)) 91 | 92 | print_tasks(task_list) 93 | 94 | overdue = find_overdue_tasks(task_list) 95 | print("Overdue tasks:") 96 | for task in overdue: 97 | print(f"Task: {task.title}, Due: {task.due_date}, Priority: {task.priority}") 98 | 99 | save_tasks_to_file(task_list, "tasks.txt") 100 | 101 | loaded_tasks = load_tasks_from_file("tasks.txt") 102 | print("Loaded tasks:") 103 | print_tasks(loaded_tasks) 104 | 105 | remove_task(task_list, "Buy groceries") 106 | print("Tasks after removal:") 107 | print_tasks(task_list) 108 | 109 | save_tasks_to_file(task_list, "tasks_updated.txt") 110 | 111 | search_results = search_tasks(task_list, "project") 112 | print("Search results for 'project':") 113 | for task in search_results: 114 | print(f"Task: {task.title}, Due: {task.due_date}, Completed: {task.completed}, Priority: {task.priority}") 115 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/temperature_converter_chat: -------------------------------------------------------------------------------- 1 | Apologies for the confusion. Let's provide the corrected git diff patches for the `temperatureconverter.py` file. 2 | 3 | ```diff 4 | --- temperatureconverter.py 5 | +++ temperatureconverter.py 6 | @@ -6,2 +6,3 @@ 7 | - def _fahrenheit_to_celsius(self, fahrenheit) 8 | + def _fahrenheit_to_celsius(self, fahrenheit): 9 | + """Convert Fahrenheit to Celsius.""" 10 | return (fahrenheit - 32) * 5 // 9 11 | @@ -15,2 +16,3 @@ 12 | - def _fahrenheit_to_kelvin(self, fahrenheit) 13 | + def _fahrenheit_to_kelvin(self, fahrenheit): 14 | + """Convert Fahrenheit to Kelvin.""" 15 | return self._celsius_to_kelvin(self._fahrenheit_to_celsius(fahrenheit)) 16 | @@ -18,2 +20,3 @@ 17 | - def _kelvin_to_fahrenheit(self, kelvin): 18 | + def _kelvin_to_fahrenheit(self, kelvin): 19 | + """Convert Kelvin to Fahrenheit.""" 20 | return self._celsius_to_fahrenheit(self._kelvin_to_celsius(kelvin)) 21 | @@ -21,3 +24,4 @@ 22 | def convert_temperature(self, temperature, from_scale, to_scale): 23 | + """Convert temperature from one scale to another.""" 24 | conversion_methods = { 25 | ('Celsius', 'Fahrenheit'): self._celsius_to_fahrenheit, 26 | @@ -30,3 +34,4 @@ 27 | - if (from_scale, to_scale) in conversion_methods 28 | - return conversion_methods[(from_scale, to_scale(temperature) 29 | + if (from_scale, to_scale) in conversion_methods: 30 | + return conversion_methods[(from_scale, to_scale)](temperature) 31 | else: 32 | @@ -35,3 +40,4 @@ 33 | def update_base_temperature(self, new_temperature, scale): 34 | + """Update the base temperature after validating the new temperature.""" 35 | if self.is_temperature_valid(new_temperature, scale): 36 | - self.base_temp = convert_temperature(new_temperature, scale, self.scale) 37 | + self.base_temp = self.convert_temperature(new_temperature, scale, self.scale) 38 | else: 39 | @@ -41,3 +47,4 @@ 40 | def set_scale(self, new_scale): 41 | + """Set a new scale and update the base temperature accordingly.""" 42 | if new_scale not in ["Celsius", "Fahrenheit", "Kelvin"]: 43 | @@ -47,3 +54,4 @@ 44 | def __str__(self): 45 | + """Return a string representation of the base temperature and scale.""" 46 | - return "Base Temperature: " + self.base_temp + " " + self.scale 47 | + return f"Base Temperature: {self.base_temp} {self.scale}" 48 | @@ -50,3 +58,4 @@ 49 | def __repr__(self): 50 | + """Return a string representation of the TemperatureConverter instance for debugging.""" 51 | - return "TemperatureConverter(base_temperature=" + self.base_temperature + ", scale='" + self.scale + "')" 52 | + return f"TemperatureConverter(base_temperature={self.base_temp}, scale='{self.scale}')" 53 | + 54 | + # Add missing conversion methods 55 | + def _celsius_to_fahrenheit(self, celsius): 56 | + """Convert Celsius to Fahrenheit.""" 57 | + return (celsius * 9 / 5) + 32 58 | + 59 | + def _kelvin_to_fahrenheit(self, kelvin): 60 | + """Convert Kelvin to Fahrenheit.""" 61 | + return (kelvin - 273) * 9 / 5 + 32 62 | + 63 | + # Add method to validate temperatures 64 | + def is_temperature_valid(self, temperature, scale): 65 | + """Validate the correctness of the given temperature within the specified scale.""" 66 | + if scale == "Celsius": 67 | + return -273 <= temperature 68 | + elif scale == "Fahrenheit": 69 | + return -459 <= temperature 70 | + elif scale == "Kelvin": 71 | + return 0 <= temperature 72 | + return False 73 | + 74 | + # Add method to reset the base temperature 75 | + def reset_base_temperature(self): 76 | + """Reset the base temperature to the default value.""" 77 | + self.base_temp = 0 78 | + self.scale = "Celsius" 79 | ``` 80 | 81 | This diff corrects the syntax errors, adds missing methods, and includes the necessary changes to ensure the `TemperatureConverter` class works as intended. The `__str__` and `__repr__` methods have been fixed to properly format the temperature as a string, and the `set_scale` and `update_base_temperature` methods have been corrected to use the class's methods properly. Additionally, the `is_temperature_valid` method has been added to validate temperatures, and a method to reset the base temperature has been included. 82 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/temperature_converter_code: -------------------------------------------------------------------------------- 1 | class TemperatureConverter 2 | def __init__(self, base_temperature=0, scale="Celsius"): 3 | self.base_temp = base_temperature 4 | self.scale = scale if scale in ["Celsius", "Fahrenheit", "Kelvin"] else "Invalid" 5 | 6 | def _fahrenheit_to_celsius(self, fahrenheit) 7 | return (fahrenheit - 32) * 5 // 9 8 | 9 | def celsius_to_kelvin(self, celsius): 10 | return celsius + 273 11 | 12 | def kelvin_to_celsius(self, kelvin): 13 | return kelvin - 273 14 | 15 | def _fahrenheit_to_kelvin(self, fahrenheit) 16 | return self._celsius_to_kelvin(_fahrenheit_to_celsius(fahrenheit)) 17 | 18 | def _kelvin_to_fahrenheit(self, kelvin): 19 | return self._celsius_to_fahrenheit(_kelvin_to_celsius(kelvin) 20 | 21 | def convert_temperature(self, temperature, from_scale, to_scale): 22 | conversion_methods = { 23 | ('Celsius', 'Fahrenheit'): self._celsius_to_fahrenheit, 24 | ('Celsius', 'Kelvin'): self.celsius_to_kelvin, 25 | ('Fahrenheit', 'Celsius'): self._fahrenheit_to_celsius, 26 | ('Fahrenheit', 'Kelvin'): self._fahrenheit_to_kelvin, 27 | ('Kelvin', 'Celsius'): self.kelvin_to_celsius, 28 | ('Kelvin', 'Fahrenheit'): self._kelvin_to_fahrenheit, 29 | } 30 | if (from_scale, to_scale) in conversion_methods 31 | return conversion_methods[(from_scale, to_scale(temperature) 32 | else 33 | raise ValueError("Invalid scale conversion") 34 | 35 | def update_base_temperature(self, new_temperature, scale): 36 | if self.is_temperature_valid(new_temperature, scale): 37 | self.base_temp = convert_temperature(new_temperature, scale, self.scale) 38 | else: 39 | raise ValueError("Invalid temperature value") 40 | 41 | def set_scale(self, new_scale): 42 | if new_scale not in ["Celsius", "Fahrenheit", "Kelvin"]: 43 | print("Invalid scale") 44 | self.base_temp = self.convert_temperature(self.base_temperature, self.scale, new_scale) 45 | self.scale = new_scale 46 | 47 | def __str__(self): 48 | return "Base Temperature: " + self.base_temp + " " + self.scale 49 | 50 | def __repr__(self): 51 | return "TemperatureConverter(base_temperature=" + self.base_temperature + ", scale='" + self.scale + "')" 52 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/theo_case_chat: -------------------------------------------------------------------------------- 1 | To add an entrypoint for running the API through the command line, we will create a new Python script that can be executed directly. This script will set up and run the FastAPI application using Uvicorn, which is an ASGI server. This approach allows you to run the server from the command line without directly using the `uvicorn` command. 2 | 3 | We will create a new file named `run.py` in the root directory. This file will import the FastAPI application from `main.py` and will use Uvicorn to run it. 4 | 5 | Here's the implementation: 6 | 7 | ```diff 8 | --- /dev/null 9 | +++ run.py 10 | @@ -0,0 +1,10 @@ 11 | +import uvicorn 12 | +from main import app 13 | + 14 | +# This script can be used to run the FastAPI application from the command line. 15 | +if __name__ == "__main__": 16 | + uvicorn.run(app, host="0.0.0.0", port=8000) 17 | ``` 18 | 19 | This script checks if it is being run as the main module and then starts the Uvicorn server with the FastAPI app. The server listens on all interfaces (`0.0.0.0`) and port `8000`. 20 | 21 | Additionally, we need to update the Dockerfile to ensure that the new script is correctly utilized when the Docker container is run. We will change the CMD directive to use `run.py` instead of directly calling `uvicorn`. 22 | 23 | ```diff 24 | --- dockerfile 25 | +++ dockerfile 26 | @@ -19,3 +19,3 @@ 27 | -# Run app.py when the container launches 28 | -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 29 | +# Run run.py when the container launches 30 | +CMD ["python", "run.py"] 31 | ``` 32 | 33 | This change in the Dockerfile ensures that when the Docker container is started, it uses the new `run.py` script to launch the application. This script sets up the Uvicorn server with the specified host and port settings. 34 | 35 | With these changes, you can now run the API both directly from the command line by executing `python run.py` and through Docker using the updated Dockerfile configuration. This setup provides flexibility for different deployment scenarios.Invalid hunk: @@ -19,3 +19,3 @@ 36 | -# Run app.py when the container launches 37 | -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 38 | +# Run run.py when the container launches 39 | +CMD ["python", "run.py"] 40 | 41 | To add an entrypoint for running the API through the command line, we will create a new Python script that can be executed directly. This script will set up and run the FastAPI application using Uvicorn, which is an ASGI server. This approach allows you to run the server from the command line without directly using the `uvicorn` command. 42 | 43 | We will create a new file named `run.py` in the root directory. This file will import the FastAPI application from `main.py` and will use Uvicorn to run it. 44 | 45 | Here's the implementation: 46 | 47 | ```diff 48 | --- /dev/null 49 | +++ run.py 50 | @@ -0,0 +1,10 @@ 51 | +import uvicorn 52 | +from main import app 53 | + 54 | +# This script can be used to run the FastAPI application from the command line. 55 | +if __name__ == "__main__": 56 | + uvicorn.run(app, host="0.0.0.0", port=8000) 57 | ``` 58 | 59 | This script checks if it is being run as the main module and then starts the Uvicorn server with the FastAPI app. The server listens on all interfaces (`0.0.0.0`) and port `8000`. 60 | 61 | Additionally, we need to update the Dockerfile to ensure that the new script is correctly utilized when the Docker container is run. We will change the CMD directive to use `run.py` instead of directly calling `uvicorn`. 62 | 63 | ```diff 64 | --- dockerfile 65 | +++ dockerfile 66 | @@ -19,3 +19,3 @@ 67 | -# Run app.py when the container launches 68 | -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 69 | +# Run run.py when the container launches 70 | +CMD ["python", "run.py"] 71 | ``` 72 | 73 | This change in the Dockerfile ensures that when the Docker container is started, it uses the new `run.py` script to launch the application. This script sets up the Uvicorn server with the specified host and port settings. 74 | 75 | With these changes, you can now run the API both directly from the command line by executing `python run.py` and through Docker using the updated Dockerfile configuration. This setup provides flexibility for different deployment scenarios.Invalid hunk: @@ -19,3 +19,3 @@ 76 | -# Run app.py when the container launches 77 | -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 78 | +# Run run.py when the container launches 79 | +CMD ["python", "run.py"] 80 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/theo_case_code: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a parent image 2 | FROM python:3.9-slim 3 | 4 | # Set the working directory in the container 5 | WORKDIR /usr/src/app 6 | 7 | # Copy the current directory contents into the container at /usr/src/app 8 | COPY . /usr/src/app 9 | 10 | # Install any needed packages specified in requirements.txt 11 | RUN pip install --no-cache-dir -r requirements.txt 12 | 13 | # Make port 80 available to the world outside this container 14 | EXPOSE 80 15 | 16 | # Define environment variable 17 | ENV NAME World 18 | 19 | # Run app.py when the container launches 20 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 21 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/vgvishesh_example_2_chat: -------------------------------------------------------------------------------- 1 | To implement a toggle button for switching between dark and light modes, we need to follow these steps: 2 | 3 | 1. Update the `App.tsx` file to include state management for the theme mode. We will use React's `useState` hook to manage the theme state and `useEffect` hook to apply the theme to the document body class. 4 | 5 | 2. Add a button in the `App.tsx` file that will toggle the theme state when clicked. This button should be styled and positioned in the top right corner of the screen. 6 | 7 | 3. Update the `styles.css` file to include styles for the dark mode. We will define a new class, say `.dark-mode`, that will contain the dark mode styles. This class will be applied to the body when the dark mode is active. 8 | 9 | 4. Update the `styles.css` file to style the toggle button. 10 | 11 | Here are the diffs for the changes: 12 | 13 | ```diff 14 | --- src/App.tsx 15 | +++ src/App.tsx 16 | @@ -1,5 +1,9 @@ 17 | import React from 'react'; 18 | +import { useState, useEffect } from 'react'; 19 | import QRCode from 'qrcode.react'; 20 | import { profiles } from './components/SocialLinks'; 21 | import { SocialLinks } from './components/SocialLinks'; 22 | +import './styles.css'; 23 | 24 | +const THEME_LIGHT = 'light'; 25 | +const THEME_DARK = 'dark'; 26 | 27 | @@ -6,7 +10,23 @@ 28 | const App: React.FC = () => { 29 | + const [theme, setTheme] = useState(THEME_LIGHT); 30 | + 31 | + useEffect(() => { 32 | + document.body.className = theme; 33 | + }, [theme]); 34 | + 35 | + const toggleTheme = () => { 36 | + setTheme(theme === THEME_LIGHT ? THEME_DARK : THEME_LIGHT); 37 | + }; 38 | + 39 | return ( 40 | + 43 |
44 |
45 |

My Social Profiles QR Code

46 | @@ -18,4 +38,4 @@ 47 | }; 48 | 49 | export default App; 50 | ``` 51 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/vgvishesh_example_2_code: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import QRCode from 'qrcode.react'; 3 | import { profiles } from './components/SocialLinks'; 4 | import { SocialLinks } from './components/SocialLinks'; 5 | 6 | const App: React.FC = () => { 7 | return ( 8 |
9 |
10 |

My Social Profiles QR Code

11 | 12 | 13 |
14 |
15 | ); 16 | }; 17 | 18 | export default App; 19 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/vgvishesh_example_chat: -------------------------------------------------------------------------------- 1 | Planning: 2 | ........ 3 | 4 | ```diff 5 | --- src/components/SocialLinks.tsx 6 | +++ src/components/SocialLinks.tsx 7 | @@ -1,10 +1,11 @@ 8 | import React from 'react'; 9 | 10 | +export const profiles = { 11 | - const profiles = { 12 | linkedIn: 'https://www.linkedin.com/', 13 | github: 'https://github.com/', 14 | whatsapp: 'https://', 15 | twitter: 'https://twitter.com/', 16 | - }; 17 | +}; 18 | 19 | export const SocialLinks: React.FC = () => { 20 | ``` 21 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/vgvishesh_example_code: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | export const SocialLinks: React.FC = () => { 4 | const profiles = { 5 | linkedIn: 'https://www.linkedin.com/', 6 | github: 'https://github.com/', 7 | whatsapp: 'https://', 8 | twitter: 'https://twitter.com/', 9 | }; 10 | 11 | return ( 12 |
13 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/zbf_yml_missing_chat: -------------------------------------------------------------------------------- 1 | To achieve the separation of environments (stage and local) and to use H2 DB for the local environment, we need to make the following changes: 2 | 3 | 1. Create a new `application-local.yml` configuration file for the local environment. 4 | 2. Modify the existing `application.yml` to be used for the stage environment. 5 | 3. Update the `run.sh` script to handle both environments. 6 | 7 | Let's start with the changes: 8 | 9 | ### 1. Create `application-local.yml` for the local environment 10 | 11 | ```diff 12 | --- /dev/null 13 | +++ src/main/resources/application-local.yml 14 | @@ -0,0 +1,13 @@ 15 | +spring: 16 | + datasource: 17 | + url: jdbc:h2:mem:testdb 18 | + driver-class-name: org.h2.Driver 19 | + username: sa 20 | + password: password 21 | + h2: 22 | + console: 23 | + enabled: true 24 | + jpa: 25 | + hibernate: 26 | + ddl-auto: update 27 | + show-sql: true 28 | ``` 29 | 30 | ### 2. Modify `application.yml` to be used for the stage environment 31 | 32 | ```diff 33 | --- src/main/resources/application.yml 34 | +++ src/main/resources/application.yml 35 | @@ -1,13 +1,13 @@ 36 | spring: 37 | datasource: 38 | - url: jdbc:postgresql://:5432/ 39 | - username: 40 | - password: 41 | + url: jdbc:postgresql://stage-rds-endpoint:5432/stage_db 42 | + username: stage_user 43 | + password: stage_password 44 | driver-class-name: org.postgresql.Driver 45 | jpa: 46 | hibernate: 47 | ddl-auto: update 48 | show-sql: true 49 | properties: 50 | hibernate: 51 | format_sql: true 52 | ``` 53 | 54 | 55 | 56 | ### 6. Update `application.yml` to include profile-specific configurations 57 | 58 | ```diff 59 | --- src/main/resources/application.yml 60 | +++ src/main/resources/application.yml 61 | @@ -1,13 +1,15 @@ 62 | spring: 63 | profiles: 64 | - active: stage 65 | + active: stage 66 | datasource: 67 | - url: jdbc:postgresql://:5432/ 68 | - username: 69 | - password: 70 | + url: jdbc:postgresql://stage-rds-endpoint:5432/stage_db 71 | + username: stage_user 72 | + password: stage_password 73 | driver-class-name: org.postgresql.Driver 74 | jpa: 75 | hibernate: 76 | ddl-auto: update 77 | show-sql: true 78 | properties: 79 | hibernate: 80 | format_sql: true 81 | ``` 82 | 83 | ### 7. Update `application-local.yml` to include profile-specific configurations 84 | 85 | ```diff 86 | --- src/main/resources/application-local.yml 87 | +++ src/main/resources/application-local.yml 88 | @@ -1,13 +1,15 @@ 89 | spring: 90 | profiles: 91 | - active: local 92 | + active: local 93 | datasource: 94 | - url: jdbc:h2:mem:testdb 95 | - driver-class-name: org.h2.Driver 96 | - username: sa 97 | - password: password 98 | + url: jdbc:h2:mem:testdb 99 | + driver-class-name: org.h2.Driver 100 | + username: sa 101 | + password: password 102 | h2: 103 | console: 104 | enabled: true 105 | jpa: 106 | hibernate: 107 | ddl-auto: update 108 | show-sql: true 109 | ``` 110 | -------------------------------------------------------------------------------- /tests/core/improve_function_test_cases/zbf_yml_missing_code: -------------------------------------------------------------------------------- 1 | spring: 2 | datasource: 3 | url: jdbc:postgresql://:5432/ 4 | username: 5 | password: 6 | driver-class-name: org.postgresql.Driver 7 | jpa: 8 | hibernate: 9 | ddl-auto: update 10 | show-sql: true 11 | properties: 12 | hibernate: 13 | format_sql: true 14 | -------------------------------------------------------------------------------- /tests/core/test_ai.py: -------------------------------------------------------------------------------- 1 | from langchain.chat_models.base import BaseChatModel 2 | from langchain_community.chat_models.fake import FakeListChatModel 3 | 4 | from gpt_engineer.core.ai import AI 5 | 6 | 7 | def mock_create_chat_model(self) -> BaseChatModel: 8 | return FakeListChatModel(responses=["response1", "response2", "response3"]) 9 | 10 | 11 | def test_start(monkeypatch): 12 | monkeypatch.setattr(AI, "_create_chat_model", mock_create_chat_model) 13 | 14 | ai = AI("gpt-4") 15 | 16 | # act 17 | response_messages = ai.start("system prompt", "user prompt", step_name="step name") 18 | 19 | # assert 20 | assert response_messages[-1].content == "response1" 21 | 22 | 23 | def test_next(monkeypatch): 24 | # arrange 25 | monkeypatch.setattr(AI, "_create_chat_model", mock_create_chat_model) 26 | 27 | ai = AI("gpt-4") 28 | response_messages = ai.start("system prompt", "user prompt", step_name="step name") 29 | 30 | # act 31 | response_messages = ai.next( 32 | response_messages, "next user prompt", step_name="step name" 33 | ) 34 | 35 | # assert 36 | assert response_messages[-1].content == "response2" 37 | 38 | 39 | def test_token_logging(monkeypatch): 40 | # arrange 41 | monkeypatch.setattr(AI, "_create_chat_model", mock_create_chat_model) 42 | 43 | ai = AI("gpt-4") 44 | 45 | # act 46 | response_messages = ai.start("system prompt", "user prompt", step_name="step name") 47 | usageCostAfterStart = ai.token_usage_log.usage_cost() 48 | ai.next(response_messages, "next user prompt", step_name="step name") 49 | usageCostAfterNext = ai.token_usage_log.usage_cost() 50 | 51 | # assert 52 | assert usageCostAfterStart > 0 53 | assert usageCostAfterNext > usageCostAfterStart 54 | -------------------------------------------------------------------------------- /tests/core/test_file_selector_enhancements.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pathlib import Path 4 | from typing import List, Union 5 | 6 | from gpt_engineer.applications.cli.file_selector import FileSelector 7 | 8 | editorcalled = False 9 | 10 | 11 | def set_editor_called( 12 | self, input_path: Union[str, Path], init: bool = True 13 | ) -> List[str]: 14 | global editorcalled 15 | editorcalled = True 16 | return [] 17 | 18 | 19 | def set_file_selector_tmpproject(tmp_path): 20 | project_path = tmp_path / "project/" 21 | os.mkdir(project_path) 22 | os.mkdir(project_path / "x") 23 | os.mkdir(project_path / "a") 24 | 25 | gpteng_path = project_path / ".gpteng" 26 | os.mkdir(gpteng_path) 27 | 28 | with open(gpteng_path / "file_selection.toml", "w") as file: 29 | file.write("[files]\n") 30 | file.write(' "x/xxtest.py" = "selected"\n') 31 | file.write(' "a/aatest.py" = "selected"\n') 32 | 33 | with open(project_path / "x/xxtest.py", "w") as file: 34 | file.write('print("Hello")') 35 | 36 | with open(project_path / "a/aatest.py", "w") as file: 37 | file.write('print("Hello")') 38 | 39 | return project_path 40 | 41 | 42 | def test_file_selector_enhancement_skip_file_selector(tmp_path): 43 | project_path = set_file_selector_tmpproject(tmp_path) 44 | fileSelector = FileSelector(project_path=project_path) 45 | fileSelector.editor_file_selector = set_editor_called 46 | fileSelector.ask_for_files(skip_file_selection=True) 47 | 48 | assert editorcalled is False, "FileSelector.skip_file_selector is not working" 49 | 50 | 51 | def test_file_selector_enhancement_sort(tmp_path): 52 | project_path = set_file_selector_tmpproject(tmp_path) 53 | fileSelector = FileSelector(project_path=project_path) 54 | 55 | sortedFiles = fileSelector.get_current_files(project_path) 56 | assert sortedFiles == [ 57 | "a/aatest.py", 58 | "x/xxtest.py", 59 | ], "FileSelector.get_current_files is unsorted!" 60 | -------------------------------------------------------------------------------- /tests/core/test_git.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import tempfile 3 | 4 | from pathlib import Path 5 | 6 | from gpt_engineer.core.git import ( 7 | filter_by_gitignore, 8 | filter_files_with_uncommitted_changes, 9 | init_git_repo, 10 | is_git_installed, 11 | is_git_repo, 12 | stage_files, 13 | ) 14 | 15 | 16 | def test_verify_git_installed(): 17 | # If git isn't installed we can't run any git tests either way 18 | assert is_git_installed() 19 | 20 | 21 | def test_init_git_repo(): 22 | with tempfile.TemporaryDirectory() as tmpdir: 23 | path = Path(tmpdir) 24 | init_git_repo(path) 25 | assert is_git_repo(path) 26 | 27 | 28 | def test_stage_files(): 29 | with tempfile.TemporaryDirectory() as tmpdir: 30 | path = Path(tmpdir) 31 | init_git_repo(path) 32 | 33 | # Create a file and stage it 34 | file = path / "test.txt" 35 | file.write_text("test") 36 | 37 | stage_files(path, ["test.txt"]) 38 | 39 | # Check if the file is staged 40 | assert ( 41 | subprocess.run( 42 | ["git", "diff", "--cached", "--name-only"], 43 | cwd=path, 44 | stdout=subprocess.PIPE, 45 | ) 46 | .stdout.decode() 47 | .strip() 48 | == "test.txt" 49 | ) 50 | 51 | 52 | def test_filter_by_gitignore(): 53 | with tempfile.TemporaryDirectory() as tmpdir: 54 | path = Path(tmpdir) 55 | init_git_repo(path) 56 | 57 | # Create a .gitignore file 58 | gitignore = path / ".gitignore" 59 | gitignore.write_text("*.txt") 60 | assert filter_by_gitignore(path, ["test.txt"]) == [] 61 | 62 | 63 | def test_filter_by_uncommitted_changes(): 64 | with tempfile.TemporaryDirectory() as tmpdir: 65 | path = Path(tmpdir) 66 | init_git_repo(path) 67 | 68 | # Create a file and commit it 69 | file = path / "test.txt" 70 | file.write_text("test") 71 | 72 | subprocess.run(["git", "add", "test.txt"], cwd=path) 73 | subprocess.run(["git", "commit", "-m", "test"], cwd=path) 74 | 75 | # Update the file 76 | file.write_text("test2") 77 | 78 | # Check if the file is staged 79 | assert filter_files_with_uncommitted_changes(path, {"test.txt": "test"}) == [ 80 | "test.txt" 81 | ] 82 | 83 | 84 | def test_filter_by_uncommitted_changes_ignore_staged_files(): 85 | with tempfile.TemporaryDirectory() as tmpdir: 86 | path = Path(tmpdir) 87 | init_git_repo(path) 88 | 89 | # Create a file but and stage it 90 | file = path / "test.txt" 91 | file.write_text("test") 92 | subprocess.run(["git", "add", "test.txt"], cwd=path) 93 | 94 | # Check if the file is staged 95 | assert filter_files_with_uncommitted_changes(path, {"test.txt": "test"}) == [] 96 | 97 | 98 | def test_filter_by_uncommitted_changes_ignore_untracked(): 99 | with tempfile.TemporaryDirectory() as tmpdir: 100 | path = Path(tmpdir) 101 | init_git_repo(path) 102 | 103 | # Create a file but don't track it 104 | file = path / "test.txt" 105 | file.write_text("test") 106 | 107 | # Check if the file is staged 108 | assert filter_files_with_uncommitted_changes(path, {"test.txt": "test"}) == [] 109 | -------------------------------------------------------------------------------- /tests/core/test_salvage_correct_hunks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | from typing import List 5 | 6 | import pytest 7 | 8 | from langchain_core.messages import AIMessage 9 | 10 | from gpt_engineer.core.default.disk_memory import DiskMemory 11 | from gpt_engineer.core.default.paths import memory_path 12 | from gpt_engineer.core.default.steps import salvage_correct_hunks 13 | from gpt_engineer.core.files_dict import FilesDict 14 | 15 | TEST_FILES_DIR = os.path.dirname(os.path.abspath(__file__)) 16 | memory = DiskMemory(memory_path(".")) 17 | 18 | 19 | def get_file_content(file_path: str) -> str: 20 | with open( 21 | os.path.join(TEST_FILES_DIR, "improve_function_test_cases", file_path), "r" 22 | ) as f: 23 | return f.read() 24 | 25 | 26 | def message_builder(chat_path: str) -> List[AIMessage]: 27 | chat_content = get_file_content(chat_path) 28 | 29 | json = { 30 | "lc": 1, 31 | "type": "constructor", 32 | "id": ["langchain", "schema", "messages", "AIMessage"], 33 | "kwargs": { 34 | "content": chat_content, 35 | "additional_kwargs": {}, 36 | "response_metadata": {"finish_reason": "stop"}, 37 | "name": None, 38 | "id": None, 39 | "example": False, 40 | }, 41 | } 42 | 43 | return [AIMessage(**json["kwargs"])] 44 | 45 | 46 | def test_validation_and_apply_complex_diff(): 47 | files = FilesDict({"taskmaster.py": get_file_content("task_master_code")}) 48 | salvage_correct_hunks(message_builder("task_master_chat"), files, memory) 49 | 50 | 51 | def test_validation_and_apply_long_diff(): 52 | files = FilesDict({"VMClonetest.ps1": get_file_content("wheaties_example_code")}) 53 | salvage_correct_hunks(message_builder("wheaties_example_chat"), files, memory) 54 | 55 | 56 | def test_validation_and_apply_wrong_diff(): 57 | files = FilesDict( 58 | {"src/components/SocialLinks.tsx": get_file_content("vgvishesh_example_code")} 59 | ) 60 | salvage_correct_hunks(message_builder("vgvishesh_example_chat"), files, memory) 61 | 62 | 63 | def test_validation_and_apply_non_change_diff(): 64 | files = FilesDict({"src/App.tsx": get_file_content("vgvishesh_example_2_code")}) 65 | salvage_correct_hunks(message_builder("vgvishesh_example_2_chat"), files, memory) 66 | 67 | 68 | def test_validation_and_apply_diff_on_apps_benchmark_6(): 69 | files = FilesDict({"main.py": get_file_content("apps_benchmark_6_code")}) 70 | salvage_correct_hunks(message_builder("apps_benchmark_6_chat"), files, memory) 71 | 72 | 73 | def test_validation_and_apply_diff_on_apps_benchmark_6_v2(): 74 | files = FilesDict({"main.py": get_file_content("apps_benchmark_6_v2_code")}) 75 | salvage_correct_hunks(message_builder("apps_benchmark_6_v2_chat"), files, memory) 76 | 77 | 78 | def test_create_two_new_files(): 79 | files = FilesDict({"main.py": get_file_content("create_two_new_files_code")}) 80 | salvage_correct_hunks(message_builder("create_two_new_files_chat"), files, memory) 81 | 82 | 83 | def test_theo_case(): 84 | files = FilesDict({"dockerfile": get_file_content("theo_case_code")}) 85 | updated_files, _ = salvage_correct_hunks( 86 | message_builder("theo_case_chat"), files, memory 87 | ) 88 | print(updated_files["dockerfile"]) 89 | print(updated_files["run.py"]) 90 | 91 | 92 | def test_zbf_yml_missing(): 93 | files = FilesDict( 94 | {"src/main/resources/application.yml": get_file_content("zbf_yml_missing_code")} 95 | ) 96 | updated_files, _ = salvage_correct_hunks( 97 | message_builder("zbf_yml_missing_chat"), files, memory 98 | ) 99 | print(updated_files["src/main/resources/application.yml"]) 100 | print(updated_files["src/main/resources/application-local.yml"]) 101 | 102 | 103 | def test_clean_up_folder(clean_up_folder): 104 | # The folder should be deleted after the test is run 105 | assert True 106 | 107 | 108 | @pytest.fixture 109 | def clean_up_folder(): 110 | yield 111 | # Teardown code: delete a folder and all its contents 112 | print("cleaning up") 113 | folder_path = os.path.join(os.path.dirname(__file__), ".gpteng") 114 | shutil.rmtree(folder_path, ignore_errors=True) 115 | 116 | 117 | if __name__ == "__main__": 118 | pytest.main() 119 | -------------------------------------------------------------------------------- /tests/core/test_token_usage.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import csv 3 | import io 4 | import os 5 | 6 | from io import StringIO 7 | from pathlib import Path 8 | 9 | from langchain.schema import HumanMessage, SystemMessage 10 | from PIL import Image 11 | 12 | from gpt_engineer.core.token_usage import Tokenizer, TokenUsageLog 13 | 14 | 15 | def test_format_log(): 16 | # arrange 17 | token_usage_log = TokenUsageLog("gpt-4") 18 | request_messages = [ 19 | SystemMessage(content="my system message"), 20 | HumanMessage(content="my user prompt"), 21 | ] 22 | response = "response from model" 23 | 24 | # act 25 | token_usage_log.update_log(request_messages, response, "step 1") 26 | token_usage_log.update_log(request_messages, response, "step 2") 27 | csv_log = token_usage_log.format_log() 28 | 29 | # assert 30 | csv_rows = list(csv.reader(StringIO(csv_log))) 31 | 32 | assert len(csv_rows) == 3 33 | 34 | assert all(len(row) == 7 for row in csv_rows) 35 | 36 | 37 | def test_usage_cost(): 38 | # arrange 39 | token_usage_log = TokenUsageLog("gpt-4") 40 | request_messages = [ 41 | SystemMessage(content="my system message"), 42 | HumanMessage(content="my user prompt"), 43 | ] 44 | response = "response from model" 45 | 46 | # act 47 | token_usage_log.update_log(request_messages, response, "step 1") 48 | token_usage_log.update_log(request_messages, response, "step 2") 49 | usage_cost = token_usage_log.usage_cost() 50 | 51 | # assert 52 | assert usage_cost > 0 53 | 54 | 55 | def test_image_tokenizer(): 56 | # Arrange 57 | token_usage_log = Tokenizer("gpt-4") 58 | image_path = Path(__file__).parent.parent / "test_data" / "mona_lisa.jpg" 59 | # Check if the image file exists 60 | if not os.path.isfile(image_path): 61 | raise FileNotFoundError(f"Image file not found: {image_path}") 62 | 63 | # Act 64 | with Image.open(image_path) as img: 65 | # Convert RGBA to RGB 66 | if img.mode == "RGBA": 67 | img = img.convert("RGB") 68 | 69 | buffered = io.BytesIO() 70 | img.save(buffered, format="JPEG") 71 | image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") 72 | 73 | # Calculate the token cost of the base64 encoded image 74 | image_token_cost = token_usage_log.num_tokens_for_base64_image(image_base64) 75 | 76 | # Assert 77 | assert image_token_cost == 1105 78 | 79 | 80 | def test_list_type_message_with_image(): 81 | # Arrange 82 | token_usage_log = TokenUsageLog("gpt-4") 83 | 84 | request_messages = [ 85 | SystemMessage(content="My system message"), 86 | HumanMessage( 87 | content=[ 88 | {"type": "text", "text": "My user message"}, 89 | { 90 | "type": "image_url", 91 | "image_url": { 92 | "url": "", 93 | "detail": "low", 94 | }, 95 | }, 96 | ] 97 | ), 98 | ] 99 | response = "response from model" 100 | 101 | # Act 102 | token_usage_log.update_log(request_messages, response, "list message with image") 103 | 104 | # Since this is the first (and only) log entry, the in-step total tokens should match our expected total 105 | expected_total_tokens = 106 106 | 107 | # Assert 108 | assert ( 109 | token_usage_log.log()[-1].in_step_total_tokens == expected_total_tokens 110 | ), f"Expected {expected_total_tokens} tokens, got {token_usage_log.log()[-1].in_step_total_tokens}" 111 | -------------------------------------------------------------------------------- /tests/mock_ai.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional 2 | 3 | 4 | class MockAI: 5 | def __init__(self, response: List): 6 | self.responses = iter(response) 7 | 8 | def start(self, system: str, user: Any, *, step_name: str) -> List[str]: 9 | return [next(self.responses)] 10 | 11 | def next( 12 | self, messages: List[str], prompt: Optional[str] = None, *, step_name: str 13 | ) -> List[str]: 14 | return [next(self.responses)] 15 | -------------------------------------------------------------------------------- /tests/test_data/mona_lisa.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AntonOsika/gpt-engineer/a90fcd543eedcc0ff2c34561bc0785d2ba83c47e/tests/test_data/mona_lisa.jpg -------------------------------------------------------------------------------- /tests/test_install.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for successful installation of the package. 3 | """ 4 | 5 | import shutil 6 | import subprocess 7 | import sys 8 | import venv 9 | 10 | from pathlib import Path 11 | 12 | import pytest 13 | 14 | # Define the directory for the virtual environment. 15 | VENV_DIR = "./venv_test_installation" 16 | 17 | 18 | @pytest.fixture(scope="module", autouse=True) 19 | def venv_setup_teardown(): 20 | """ 21 | A pytest fixture that sets up and tears down a virtual environment for testing. 22 | This fixture is automatically used for all tests in this module. 23 | 24 | The fixture: 25 | - Creates a virtual environment. 26 | - Installs Poetry in the virtual environment. 27 | - Installs dependencies using Poetry. 28 | - Cleans up by removing the virtual environment after tests are completed. 29 | """ 30 | try: 31 | # Create a virtual environment with pip available. 32 | venv.create(VENV_DIR, with_pip=True, clear=True) 33 | 34 | # Install Poetry in the virtual environment. 35 | subprocess.run( 36 | [f"{VENV_DIR}/bin/python", "-m", "pip", "install", "poetry"], check=True 37 | ) 38 | 39 | # Install the package and its dependencies using Poetry. 40 | subprocess.run([f"{VENV_DIR}/bin/poetry", "install"], cwd=".", check=True) 41 | 42 | # Provide the setup environment to the test functions. 43 | yield 44 | except Exception as e: 45 | # Skip tests if the environment setup fails. 46 | pytest.skip(f"Could not create venv or install dependencies: {str(e)}") 47 | finally: 48 | # Clean up by removing the virtual environment after tests. 49 | shutil.rmtree(VENV_DIR) 50 | 51 | 52 | def test_installation(): 53 | """ 54 | Test to ensure that the package can be installed using Poetry in the virtual environment. 55 | """ 56 | # Determine the correct Poetry executable path based on the operating system. 57 | poetry_executable = ( 58 | f"{VENV_DIR}/bin/poetry" 59 | if sys.platform != "win32" 60 | else f"{VENV_DIR}/Scripts/poetry.exe" 61 | ) 62 | 63 | # Run Poetry install and capture its output. 64 | result = subprocess.run([poetry_executable, "install"], capture_output=True) 65 | 66 | # Assert that the installation was successful. 67 | assert ( 68 | result.returncode == 0 69 | ), f"Install via poetry failed: {result.stderr.decode()}" 70 | 71 | 72 | def test_cli_execution(): 73 | """ 74 | Test to verify that the command-line interface (CLI) of the package works as expected. 75 | This test assumes that the 'gpt-engineer' command is available and operational after installation. 76 | """ 77 | # Run the 'gpt-engineer' command with the '--help' option and capture its output. 78 | result = subprocess.run( 79 | args=["gpt-engineer", "--help"], capture_output=True, text=True 80 | ) 81 | 82 | # Assert that the CLI command executed successfully. 83 | assert ( 84 | result.returncode == 0 85 | ), f"gpt-engineer command failed with message: {result.stderr}" 86 | 87 | 88 | @pytest.mark.requires_key 89 | def test_installed_main_execution(tmp_path, monkeypatch): 90 | # Ignore git installation check 91 | monkeypatch.setattr("gpt_engineer.core.git.is_git_installed", lambda: False) 92 | tmp_path = Path(tmp_path) 93 | p = tmp_path / "projects/example" 94 | p.mkdir(parents=True) 95 | (p / "prompt").write_text("make a program that prints the outcome of 4+4") 96 | proc = subprocess.Popen( 97 | ["gpte", str(p)], 98 | stdin=subprocess.PIPE, 99 | stdout=subprocess.PIPE, 100 | text=True, 101 | cwd=tmp_path, 102 | ) 103 | 104 | inputs = "Y\nn" 105 | output, _ = proc.communicate(inputs) 106 | 107 | assert "8" in output 108 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py310, py311, py312 3 | 4 | [testenv] 5 | basepython = 6 | py310: python3.10 7 | py311: python3.11 8 | py312: python3.12 9 | deps = 10 | poetry 11 | commands = 12 | poetry install --no-root 13 | poetry run pytest --cov=gpt_engineer --cov-report=xml -k 'not installed_main_execution' 14 | --------------------------------------------------------------------------------