├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── ISSUE_TEMPLATE.md │ └── issue-template.md ├── PULL_REQUEST_TEMPLATE │ └── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yaml │ ├── codeql.yml │ ├── pre-commit.yaml │ └── release.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── DISCLAIMER.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── ROADMAP.md ├── TERMS_OF_USE.md ├── WINDOWS_README.md ├── benchmark ├── RESULTS.md ├── currency_converter │ └── prompt ├── file_explorer │ └── prompt ├── file_organizer │ └── prompt ├── image_resizer │ └── prompt ├── markdown_editor │ └── prompt ├── password_generator │ └── prompt ├── pomodoro_timer │ └── prompt ├── timer_app │ └── prompt ├── todo_list │ └── prompt └── url_shortener │ └── prompt ├── gpt_engineer ├── __init__.py ├── ai.py ├── chat_to_files.py ├── collect.py ├── db.py ├── domain.py ├── learning.py ├── main.py ├── preprompts │ ├── fix_code │ ├── generate │ ├── philosophy │ ├── qa │ ├── respec │ ├── spec │ ├── unit_tests │ ├── use_feedback │ └── use_qa └── steps.py ├── projects └── example │ └── prompt ├── pyproject.toml ├── scripts ├── benchmark.py ├── clean_benchmarks.py ├── print_chat.py └── rerun_edited_message_logs.py └── tests ├── __init__.py ├── steps ├── __init__.py └── test_archive.py ├── test_ai.py ├── test_chat_to_files.py ├── test_collect.py └── test_db.py /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity or expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting using an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of reporters of incidents. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.1, available at 118 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 119 | 120 | Community Impact Guidelines were inspired by 121 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 122 | 123 | For answers to common questions about this code of conduct, see the FAQ at 124 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 125 | [https://www.contributor-covenant.org/translations][translations]. 126 | 127 | [homepage]: https://www.contributor-covenant.org 128 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 129 | [Mozilla CoC]: https://github.com/mozilla/diversity 130 | [FAQ]: https://www.contributor-covenant.org/faq 131 | [translations]: https://www.contributor-covenant.org/translations 132 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to GPT Engineer 2 | 3 | By participating in this project, you agree to abide by the [code of conduct](CODE_OF_CONDUCT.md). 4 | 5 | ## Getting Started 6 | 7 | To get started with contributing, please follow these steps: 8 | 9 | 1. Fork the repository and clone it to your local machine. 10 | 2. Install any necessary dependencies. 11 | 3. Create a new branch for your changes: `git checkout -b my-branch-name`. 12 | 4. Make your desired changes or additions. 13 | 5. Run the tests to ensure everything is working as expected. 14 | 6. Commit your changes: `git commit -m "Descriptive commit message"`. 15 | 7. Push to the branch: `git push origin my-branch-name`. 16 | 8. Submit a pull request to the `main` branch of the original repository. 17 | 18 | ## Code Style 19 | 20 | Please make sure to follow the established code style guidelines for this project. Consistent code style helps maintain readability and makes it easier for others to contribute to the project. 21 | 22 | To enforce this we use [`pre-commit`](https://pre-commit.com/) to run [`black`](https://black.readthedocs.io/en/stable/index.html) and [`ruff`](https://beta.ruff.rs/docs/) on every commit. 23 | 24 | `pre-commit` is part of our `requirements.txt` file so you should already have it installed. If you don't, you can install the library via pip with: 25 | 26 | ```bash 27 | $ pip install -e . 28 | 29 | # And then install the `pre-commit` hooks with: 30 | 31 | $ pre-commit install 32 | 33 | # output: 34 | pre-commit installed at .git/hooks/pre-commit 35 | ``` 36 | 37 | Or you could just run `make dev-install` to install the dependencies and the hooks. 38 | 39 | If you are not familiar with the concept of [git hooks](https://git-scm.com/docs/githooks) and/or [`pre-commit`](https://pre-commit.com/) please read the documentation to understand how they work. 40 | 41 | As an introduction of the actual workflow, here is an example of the process you will encounter when you make a commit: 42 | 43 | Let's add a file we have modified with some errors, see how the pre-commit hooks run `black` and fails. 44 | `black` is set to automatically fix the issues it finds: 45 | 46 | ```bash 47 | $ git add chat_to_files.py 48 | $ git commit -m "commit message" 49 | black....................................................................Failed 50 | - hook id: black 51 | - files were modified by this hook 52 | 53 | reformatted chat_to_files.py 54 | 55 | All done! ✨ 🍰 ✨ 56 | 1 file reformatted. 57 | ``` 58 | 59 | You can see that `chat_to_files.py` is both staged and not staged for commit. This is because `black` has formatted it and now it is different from the version you have in your working directory. To fix this you can simply run `git add chat_to_files.py` again and now you can commit your changes. 60 | 61 | ```bash 62 | $ git status 63 | On branch pre-commit-setup 64 | Changes to be committed: 65 | (use "git restore --staged ..." to unstage) 66 | modified: chat_to_files.py 67 | 68 | Changes not staged for commit: 69 | (use "git add ..." to update what will be committed) 70 | (use "git restore ..." to discard changes in working directory) 71 | modified: chat_to_files.py 72 | ``` 73 | 74 | Now let's add the file again to include the latest commits and see how `ruff` fails. 75 | 76 | ```bash 77 | $ git add chat_to_files.py 78 | $ git commit -m "commit message" 79 | black....................................................................Passed 80 | ruff.....................................................................Failed 81 | - hook id: ruff 82 | - exit code: 1 83 | - files were modified by this hook 84 | 85 | Found 2 errors (2 fixed, 0 remaining). 86 | ``` 87 | 88 | Same as before, you can see that `chat_to_files.py` is both staged and not staged for commit. This is because `ruff` has formatted it and now it is different from the version you have in your working directory. To fix this you can simply run `git add chat_to_files.py` again and now you can commit your changes. 89 | 90 | ```bash 91 | $ git add chat_to_files.py 92 | $ git commit -m "commit message" 93 | black....................................................................Passed 94 | ruff.....................................................................Passed 95 | fix end of files.........................................................Passed 96 | [pre-commit-setup f00c0ce] testing 97 | 1 file changed, 1 insertion(+), 1 deletion(-) 98 | ``` 99 | 100 | Now your file has been committed and you can push your changes. 101 | 102 | At the beginning this might seem like a tedious process (having to add the file again after `black` and `ruff` have modified it) but it is actually very useful. It allows you to see what changes `black` and `ruff` have made to your files and make sure that they are correct before you commit them. 103 | 104 | ## Issue Tracker 105 | 106 | If you encounter any bugs, issues, or have feature requests, please [create a new issue](https://github.com/AntonOsika/gpt-engineer/issues/new) on the project's GitHub repository. Provide a clear and descriptive title along with relevant details to help us address the problem or understand your request. 107 | 108 | ## Licensing 109 | 110 | By contributing to GPT Engineer, you agree that your contributions will be licensed under the [LICENSE](../LICENSE) file of the project. 111 | 112 | Thank you for your interest in contributing to GPT Engineer! We appreciate your support and look forward to your contributions. 113 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [antonosika] 4 | patreon: gpt-engineer 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.** 2 | 3 | # Issue Template 4 | 5 | ## Expected Behavior 6 | 7 | Please describe the behavior you are expecting. 8 | 9 | ## Current Behavior 10 | 11 | What is the current behavior? 12 | 13 | ## Failure Information (for bugs) 14 | 15 | Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template. 16 | 17 | ### Steps to Reproduce 18 | 19 | Please provide detailed steps for reproducing the issue. 20 | 21 | 1. step 1 22 | 2. step 2 23 | 3. you get it... 24 | 25 | ### Failure Logs 26 | 27 | Please include any relevant log snippets or files here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue-template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue template 3 | about: All templates should use this format unless there is a reason not to 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.** 10 | 11 | ## Issue Template 12 | 13 | ## Expected Behavior 14 | 15 | Please describe the behavior you are expecting. 16 | 17 | ## Current Behavior 18 | 19 | What is the current behavior? 20 | 21 | ## Failure Information (for bugs) 22 | 23 | Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template. 24 | 25 | ### Steps to Reproduce 26 | 27 | Please provide detailed steps for reproducing the issue. 28 | 29 | 1. step 1 30 | 2. step 2 31 | 3. you get it... 32 | 33 | ### Failure Logs 34 | 35 | Please include any relevant log snippets or files here. 36 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.** 2 | 3 | ## How Has This Been Tested? 4 | 5 | Please describe if you have either: 6 | 7 | - Generated the "example" project 8 | - Ran the entire benchmark suite 9 | - Something else 10 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Pip install and pytest 2 | on: 3 | pull_request: 4 | branches: [main] 5 | paths: 6 | - "**.py" 7 | push: 8 | branches: [main] 9 | paths: 10 | - "**.py" 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - uses: actions/setup-python@v4 19 | with: 20 | python-version: "3.10" 21 | cache: pip 22 | 23 | - name: Install package 24 | run: pip install -e . 25 | 26 | - name: Install test runner 27 | run: pip install pytest pytest-cov 28 | 29 | - name: Run unit tests 30 | run: pytest --cov=gpt_engineer 31 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ 'main' ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ 'main' ] 9 | schedule: 10 | - cron: '26 2 * * 6' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 16 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 17 | permissions: 18 | actions: read 19 | contents: read 20 | security-events: write 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | language: [ 'python' ] 26 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 27 | # Use only 'java' to analyze code written in Java, Kotlin or both 28 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 29 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 30 | 31 | steps: 32 | - name: Checkout repository 33 | uses: actions/checkout@v3 34 | 35 | # Initializes the CodeQL tools for scanning. 36 | - name: Initialize CodeQL 37 | uses: github/codeql-action/init@v2 38 | with: 39 | languages: ${{ matrix.language }} 40 | # If you wish to specify custom queries, you can do so here or in a config file. 41 | # By default, queries listed here will override any specified in a config file. 42 | # Prefix the list here with "+" to use these queries and those in the config file. 43 | 44 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 45 | # queries: security-extended,security-and-quality 46 | 47 | 48 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 49 | # If this step fails, then you should remove it and run the build manually (see below) 50 | - name: Autobuild 51 | uses: github/codeql-action/autobuild@v2 52 | 53 | # ℹ️ Command-line programs to run using the OS shell. 54 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 55 | 56 | # If the Autobuild fails above, remove it and uncomment the following three lines. 57 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 58 | 59 | # - run: | 60 | # echo "Run, Build Application using script" 61 | # ./location_of_script_within_repo/buildscript.sh 62 | 63 | - name: Perform CodeQL Analysis 64 | uses: github/codeql-action/analyze@v2 65 | with: 66 | category: "/language:${{matrix.language}}" 67 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | 12 | permissions: 13 | contents: write 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - uses: actions/setup-python@v4 19 | 20 | - uses: pre-commit/action@v3.0.0 21 | with: 22 | extra_args: --all-files 23 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Build and publish Python packages to PyPI 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: 7 | - published 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: 15 | - "3.10" 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: pip 23 | 24 | - name: Install build tool 25 | run: pip install build 26 | 27 | - name: Build package 28 | run: python -m build 29 | 30 | - name: Upload package as build artifact 31 | uses: actions/upload-artifact@v3 32 | with: 33 | name: package 34 | path: dist/ 35 | 36 | publish: 37 | runs-on: ubuntu-latest 38 | needs: build 39 | environment: 40 | name: pypi 41 | url: https://pypi.org/p/gpt-engineer 42 | permissions: 43 | id-token: write 44 | steps: 45 | - name: Collect packages to release 46 | uses: actions/download-artifact@v3 47 | with: 48 | name: package 49 | path: dist/ 50 | 51 | - name: Publish packages to PyPI 52 | uses: pypa/gh-action-pypi-publish@release/v1 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # Distribution / packaging 9 | dist/ 10 | build/ 11 | *.egg-info/ 12 | *.egg 13 | 14 | # Virtual environments 15 | .env 16 | .env.sh 17 | venv/ 18 | ENV/ 19 | 20 | # IDE-specific files 21 | .vscode/ 22 | .idea/ 23 | 24 | # Compiled Python modules 25 | *.pyc 26 | *.pyo 27 | *.pyd 28 | 29 | # Python testing 30 | .pytest_cache/ 31 | .ruff_cache/ 32 | .coverage 33 | .mypy_cache/ 34 | 35 | # macOS specific files 36 | .DS_Store 37 | 38 | # Windows specific files 39 | Thumbs.db 40 | 41 | # this application's specific files 42 | archive 43 | 44 | # any log file 45 | *log.txt 46 | todo 47 | scratchpad 48 | 49 | # Ignore GPT Engineer files 50 | projects 51 | !projects/example 52 | 53 | # Pyenv 54 | .python-version 55 | 56 | # Benchmark files 57 | benchmark 58 | !benchmark/*/prompt 59 | 60 | .gpte_consent 61 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | fail_fast: true 4 | default_stages: [commit] 5 | 6 | repos: 7 | - repo: https://github.com/pre-commit/mirrors-mypy 8 | rev: v1.3.0 9 | hooks: 10 | - id: mypy 11 | additional_dependencies: [types-tabulate==0.9.0.2] 12 | 13 | - repo: https://github.com/psf/black 14 | rev: 23.3.0 15 | hooks: 16 | - id: black 17 | args: [--config, pyproject.toml] 18 | types: [python] 19 | 20 | - repo: https://github.com/charliermarsh/ruff-pre-commit 21 | rev: "v0.0.272" 22 | hooks: 23 | - id: ruff 24 | args: [--fix, --exit-non-zero-on-fix] 25 | 26 | - repo: https://github.com/pre-commit/pre-commit-hooks 27 | rev: v4.4.0 28 | hooks: 29 | - id: check-toml 30 | - id: check-yaml 31 | - id: detect-private-key 32 | - id: end-of-file-fixer 33 | - id: trailing-whitespace 34 | -------------------------------------------------------------------------------- /DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | 3 | gpt-engineer is an experimental application and is provided "as-is" without any warranty, express or implied. By using this software, you agree to assume all risks associated with its use, including but not limited to data loss, system failure, or any other issues that may arise. 4 | 5 | The developers and contributors of this project do not accept any responsibility or liability for any losses, damages, or other consequences that may occur as a result of using this software. You are solely responsible for any decisions and actions taken based on the information provided by gpt-engineer. 6 | 7 | Please note that the use of the GPT-4 language model can be expensive due to its token usage. By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges. 8 | 9 | As an autonomous experiment, gpt-engineer may generate code or take actions that are not in line with real-world business practices or legal requirements. It is your responsibility to ensure that any actions or decisions made by the generated code comply with all applicable laws, regulations, and ethical standards. The developers and contributors of this project shall not be held responsible for any consequences arising from the use of this software. 10 | 11 | By using gpt-engineer, you agree to indemnify, defend, and hold harmless the developers, contributors, and any affiliated parties from and against any and all claims, damages, losses, liabilities, costs, and expenses (including reasonable attorneys' fees) arising from your use of this software or your violation of these terms. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Anton Osika 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include gpt_engineer/preprompts * 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #Sets the default shell for executing commands as /bin/bash and specifies command should be executed in a Bash shell. 2 | SHELL := /bin/bash 3 | 4 | # Color codes for terminal output 5 | COLOR_RESET=\033[0m 6 | COLOR_CYAN=\033[1;36m 7 | COLOR_GREEN=\033[1;32m 8 | 9 | # Defines the targets help, install, dev-install, and run as phony targets. Phony targets are targets that are not really the name of files that are to be built. Instead, they are treated as commands. 10 | .PHONY: help install run 11 | 12 | #sets the default goal to help when no target is specified on the command line. 13 | .DEFAULT_GOAL := help 14 | 15 | #Disables echoing of commands. The commands executed by Makefile will not be printed on the console during execution. 16 | .SILENT: 17 | 18 | #Sets the variable name to the second word from the MAKECMDGOALS. MAKECMDGOALS is a variable that contains the command-line targets specified when running make. In this case, the variable name will hold the value of the folder name specified when running the run target. 19 | name := $(word 2,$(MAKECMDGOALS)) 20 | 21 | #Defines a target named help. 22 | help: 23 | @echo "Please use 'make ' where is one of the following:" 24 | @echo " help Return this message with usage instructions." 25 | @echo " install Will install the dependencies and create a virtual environment." 26 | @echo " run Runs GPT Engineer on the folder with the given name." 27 | 28 | #Defines a target named install. This target will create a virtual environment, upgrade pip, install the dependencies, and install the pre-commit hooks. This means that running make install will first execute the create-venv target, then the upgrade-pip target, then the install-dependencies target, and finally the install-pre-commit target. 29 | install: create-venv upgrade-pip install-dependencies install-pre-commit farewell 30 | 31 | #Defines a target named create-venv. This target will create a virtual environment in the venv folder. 32 | create-venv: 33 | @echo -e "$(COLOR_CYAN)Creating virtual environment...$(COLOR_RESET)" && \ 34 | python -m venv venv 35 | 36 | #Defines a target named upgrade-pip. This target will upgrade pip to the latest version. 37 | upgrade-pip: 38 | @echo -e "$(COLOR_CYAN)Upgrading pip...$(COLOR_RESET)" && \ 39 | source venv/bin/activate && \ 40 | pip install --upgrade pip >> /dev/null 41 | 42 | #Defines a target named install-dependencies. This target will install the dependencies. 43 | install-dependencies: 44 | @echo -e "$(COLOR_CYAN)Installing dependencies...$(COLOR_RESET)" && \ 45 | source venv/bin/activate && \ 46 | pip install -e . >> /dev/null 47 | 48 | #Defines a target named install-pre-commit. This target will install the pre-commit hooks. 49 | install-pre-commit: 50 | @echo -e "$(COLOR_CYAN)Installing pre-commit hooks...$(COLOR_RESET)" && \ 51 | source venv/bin/activate && \ 52 | pre-commit install 53 | 54 | #Defines a target named farewell. This target will print a farewell message. 55 | farewell: 56 | @echo -e "$(COLOR_GREEN)All done!$(COLOR_RESET)" 57 | 58 | #Defines a target named run. This target will run GPT Engineer on the folder with the given name, name was defined earlier in the Makefile. 59 | run: 60 | @echo -e "$(COLOR_CYAN)Running GPT Engineer on $(COLOR_GREEN)$(name)$(COLOR_CYAN) folder...$(COLOR_RESET)" && \ 61 | source venv/bin/activate && \ 62 | gpt-engineer projects/$(name) 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT Engineer 2 | 3 | [![Discord Follow](https://dcbadge.vercel.app/api/server/8tcDQ89Ej2?style=flat)](https://discord.gg/8tcDQ89Ej2) 4 | [![GitHub Repo stars](https://img.shields.io/github/stars/AntonOsika/gpt-engineer?style=social)](https://github.com/AntonOsika/gpt-engineer) 5 | [![Twitter Follow](https://img.shields.io/twitter/follow/antonosika?style=social)](https://twitter.com/AntonOsika) 6 | 7 | 8 | **Specify what you want it to build, the AI asks for clarification, and then builds it.** 9 | 10 | GPT Engineer is made to be easy to adapt, extend, and make your agent learn how you want your code to look. It generates an entire codebase based on a prompt. 11 | 12 | [Demo](https://twitter.com/antonosika/status/1667641038104674306) 13 | 14 | ## Project philosophy 15 | 16 | - Simple to get value 17 | - Flexible and easy to add new own "AI steps". See `steps.py`. 18 | - Incrementally build towards a user experience of: 19 | 1. high level prompting 20 | 2. giving feedback to the AI that it will remember over time 21 | - Fast handovers back and forth between AI and human 22 | - Simplicity, all computation is "resumable" and persisted to the filesystem 23 | 24 | ## Usage 25 | 26 | Choose either **stable** or **development**. 27 | 28 | For **stable** release: 29 | 30 | - `pip install gpt-engineer` 31 | 32 | For **development**: 33 | - `git clone https://github.com/AntonOsika/gpt-engineer.git` 34 | - `cd gpt-engineer` 35 | - `pip install -e .` 36 | - (or: `make install && source venv/bin/activate` for a venv) 37 | 38 | **Setup** 39 | 40 | With an OpenAI API key (preferably with GPT-4 access) run: 41 | 42 | - `export OPENAI_API_KEY=[your api key]` 43 | 44 | Alternative for Windows 45 | 46 | **Run**: 47 | 48 | - Create an empty folder. If inside the repo, you can run: 49 | - `cp -r projects/example/ projects/my-new-project` 50 | - Fill in the `prompt` file in your new folder 51 | - `gpt-engineer projects/my-new-project` 52 | - (Note, `gpt-engineer --help` lets you see all available options. For example `--steps use_feedback` lets you improve/fix code in a project) 53 | 54 | By running gpt-engineer you agree to our [terms](https://github.com/AntonOsika/gpt-engineer/blob/main/TERMS_OF_USE.md). 55 | 56 | **Results** 57 | - Check the generated files in `projects/my-new-project/workspace` 58 | 59 | 60 | To **run in the browser** you can simply: 61 | 62 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/AntonOsika/gpt-engineer/codespaces) 63 | 64 | 65 | 66 | ## Features 67 | 68 | You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder. 69 | 70 | Editing the `preprompts`, and evolving how you write the project prompt, is how you make the agent remember things between projects. 71 | 72 | Each step in `steps.py` will have its communication history with GPT4 stored in the logs folder, and can be rerun with `scripts/rerun_edited_message_logs.py`. 73 | 74 | ## Vision 75 | The gpt-engineer community is building the **open platform for devs to tinker with and build their personal code-generation toolbox**. 76 | 77 | If you are interested in contributing to this, we would be interested in having you. 78 | 79 | If you want to see our broader ambitions, check out the [roadmap](https://github.com/AntonOsika/gpt-engineer/blob/main/ROADMAP.md), and join 80 | [discord](https://discord.gg/8tcDQ89Ej2) 81 | to get input on how you can [contribute](.github/CONTRIBUTING.md) to it. 82 | 83 | We are currently looking for more maintainers and community organisers. Email anton.osika@gmail.com if you are interested in an official role. 84 | 85 | 86 | ## Example 87 | 88 | https://github.com/AntonOsika/gpt-engineer/assets/4467025/6e362e45-4a94-4b0d-973d-393a31d92d9b 89 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | There are three main milestones we believe will greatly increase gpt-engineer's reliability and capability: 4 | - [x] Continuous evaluation of our progress 🎉 5 | - [ ] Test code and fix errors with LLMs 6 | - [ ] Make code generation become small, verifiable steps 7 | 8 | ## Our current focus: 9 | 10 | - [x] **Continuous evaluation of progress 🎉** 11 | - [x] Create a step that asks “did it run/work/perfect” in the end of each run [#240](https://github.com/AntonOsika/gpt-engineer/issues/240) 🎉 12 | - [x] Collect a dataset for gpt engineer to learn from, by storing code generation runs 🎉 13 | - [ ] Run the benchmark multiple times, and document the results for the different "step configs" [#239](https://github.com/AntonOsika/gpt-engineer/issues/239) 14 | - [ ] Improve the default config based on results 15 | - [ ] **Self healing code** 16 | - [ ] Run the generated tests 17 | - [ ] Feed the results of failing tests back into LLM and ask it to fix the code 18 | - [ ] **Let human give feedback** 19 | - [ ] Ask human for what is not working as expected in a loop, and feed it into LLM to fix the code, until the human is happy 20 | - [ ] **Improve existing projects** 21 | - [ ] Decide on the "flow" for the CLI commands and where the project files are created 22 | - [ ] Add an "improve code" command 23 | - [ ] Architect how gpt-engineer becomes a platform 24 | - [ ] Integrate Aider 25 | 26 | ## Experimental research 27 | This is not our current focus, but if you are interested in experimenting: Please 28 | create a thread in Discord #general and share your intentions and your findings as you 29 | go along. High impact examples: 30 | - [ ] **Make code generation become small, verifiable steps** 31 | - [ ] Ask GPT4 to decide how to sequence the entire generation, and do one 32 | prompt for each subcomponent 33 | - [ ] For each small part, generate tests for that subpart, and do the loop of running the tests for each part, feeding 34 | results into GPT4, and let it edit the code until they pass 35 | - [ ] **Ad hoc experiments** 36 | - [ ] Try Microsoft guidance, and benchmark if this helps improve performance 37 | - [ ] Dynamic planning: Let gpt-engineer plan which "steps" to carry out itself, depending on the 38 | task, by giving it few shot example of what are usually "the right-sized steps" to carry 39 | out for such projects 40 | 41 | ## Codebase improvements 42 | By improving the codebase and developer ergonomics, we accelerate progress. Some examples: 43 | - [ ] Set up automatic PR review for all PRs with e.g. Codium pr-agent 44 | - [ ] LLM tests in CI: Run super small tests with GPT3.5 in CI, that check that simple code generation still works 45 | 46 | # How you can help out 47 | 48 | You can: 49 | 50 | - Post a "design" as a google doc in our Discord and ask for feedback to address one of the items in the roadmap 51 | - Submit PRs to address one of the items in the roadmap 52 | - Do a review of someone else's PR and propose next steps (further review, merge, close) 53 | 54 | Volunteer work in any of these will get acknowledged. 55 | -------------------------------------------------------------------------------- /TERMS_OF_USE.md: -------------------------------------------------------------------------------- 1 | # Terms of Use 2 | 3 | By using gpt-engineer you are aware of and agree to the below Terms of Use, as well as the attached [disclaimer of warranty](https://github.com/AntonOsika/gpt-engineer/blob/main/DISCLAIMER.md). 4 | 5 | Both OpenAI, L.L.C. and the creators of gpt-engineer **store data 6 | about how gpt-engineer is used** with the sole intent of improving the capability of the product. Care is taken to not store any information that can be tied to a person. 7 | 8 | Please beware that natural text input, such as the files `prompt` and `feedback`, will be stored and this can, in theory, be used to (although the gpt-engineer creators will never attempt to do so) connect a person's style of writing or content in the files to a real person. 9 | 10 | More information about OpenAI's terms of use [here](https://openai.com/policies/terms-of-use). 11 | 12 | You can disable storing usage data by gpt-engineer, **but not OpenAI**, by setting the environment variable COLLECT_LEARNINGS_OPT_OUT=true. 13 | -------------------------------------------------------------------------------- /WINDOWS_README.md: -------------------------------------------------------------------------------- 1 | # Windows Setup 2 | ## Short version 3 | 4 | On Windows, follow the standard [README.md](https://github.com/AntonOsika/gpt-engineer/blob/main/README.md), but to set API key do one of: 5 | - `set OPENAI_API_KEY=[your api key]` on cmd 6 | - `$env:OPENAI_API_KEY="[your api key]"` on powershell 7 | 8 | ## Full setup guide 9 | 10 | Choose either **stable** or **development**. 11 | 12 | For **stable** release: 13 | 14 | Run `pip install gpt-engineer` in the command line 15 | 16 | Or: 17 | 18 | 1. Open your web browser and navigate to the Python Package Index (PyPI) website: . 19 | 2. On the PyPI page for the gpt-engineer package, locate the "Download files" section. Here you'll find a list of available versions and their corresponding download links. 20 | 3. Identify the version of gpt-engineer you want to install and click on the associated download link. This will download the package file (usually a .tar.gz or .whl file) to your computer. 21 | 4. Once the package file is downloaded, open your Python development environment or IDE. 22 | 5. In your Python development environment, look for an option to install packages or manage dependencies. The exact location and terminology may vary depending on your IDE. For example, in PyCharm, you can go to "File" > "Settings" > "Project: \" > "Python Interpreter" to manage packages. 23 | 6. In the package management interface, you should see a list of installed packages. Look for an option to add or install a new package. 24 | 7. Click on the "Add Package" or "Install Package" button. 25 | 8. In the package installation dialog, choose the option to install from a file or from a local source. 26 | 9. Browse and select the downloaded gpt-engineer package file from your computer. 27 | 28 | For **development**: 29 | 30 | - `git clone git@github.com:AntonOsika/gpt-engineer.git` 31 | - `cd gpt-engineer` 32 | - `pip install -e .` 33 | - (or: `make install && source venv/bin/activate` for a venv) 34 | 35 | ### Setup 36 | 37 | With an api key from OpenAI: 38 | 39 | Run `set OPENAI_API_KEY=[your API key]` in the command line 40 | 41 | Or: 42 | 43 | 1. In the Start Menu, type to search for "Environment Variables" and click on "Edit the system environment variables". 44 | 2. In the System Properties window, click on the "Environment Variables" button. 45 | 3. In the Environment Variables window, you'll see two sections: User variables and System variables. 46 | 4. To set a user-specific environment variable, select the "New" button under the User variables section. 47 | 5. To set a system-wide environment variable, select the "New" button under the System variables section. 48 | 6. Enter the variable name "OPENAI_API_KEY" in the "Variable name" field. 49 | 7. Enter the variable value (e.g., your API key) in the "Variable value" field. 50 | 8. Click "OK" to save the changes. 51 | 9. Close any open command prompt or application windows and reopen them for the changes to take effect. 52 | 53 | Now you can use `%OPENAI_API_KEY%` when prompted to input your key. 54 | 55 | ### Run 56 | 57 | - Create an empty folder. If inside the repo, you can: 58 | - Run `xcopy /E projects\example projects\my-new-project` in the command line 59 | - Or hold CTRL and drag the folder down to create a copy, then rename to fit your project 60 | - Fill in the `prompt` file in your new folder 61 | - `gpt-engineer projects/my-new-project` 62 | - (Note, `gpt-engineer --help` lets you see all available options. For example `--steps use_feedback` lets you improve/fix code in a project) 63 | 64 | By running gpt-engineer you agree to our [ToS](https://github.com/AntonOsika/gpt-engineer/TERMS_OF_USE.md). 65 | 66 | ### Results 67 | 68 | - Check the generated files in `projects/my-new-project/workspace` 69 | -------------------------------------------------------------------------------- /benchmark/RESULTS.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | ```bash 4 | python scripts/benchmark.py 5 | ``` 6 | 7 | ## 2023-06-21 8 | 9 | | Benchmark | Ran | Works | Perfect | 10 | |--------------------|-----|-------|---------| 11 | | currency_converter | ✅ | ❌ | ❌ | 12 | | image_resizer | ✅ | ✅ | ✅ | 13 | | pomodoro_timer | ✅ | ✅ | ✅ | 14 | | url_shortener | ✅ | ✅ | ✅ | 15 | | file_explorer | ✅ | ✅ | ✅ | 16 | | markdown_editor | ✅ | ✅ | ❌ | 17 | | timer_app | ✅ | ❌ | ❌ | 18 | | weather_app | ✅ | ✅ | ✅ | 19 | | file_organizer | ✅ | ✅ | ✅ | 20 | | password_generator | ✅ | ✅ | ✅ | 21 | | todo_list | ✅ | ✅ | ✅ | 22 | 23 | ## Notes on the errors 24 | 25 | Most errors come from that the "generate entrypoint" are incorrect. Ignoring 26 | those, we get 8/11 fully correct. 27 | 28 | All errors are very easy to fix. 29 | 30 | One error was trying to modify a constant. 31 | One error was that the html template was not fully filled in. 32 | One error is that a dependency was used incorrectly and easy to fix 33 | 34 | ## 2023-06-19 35 | 36 | | Benchmark | Ran | Works | Perfect | 37 | |--------------------|-----|-------|---------| 38 | | currency_converter | ❌ | ❌ | ❌ | 39 | | image_resizer | ✅ | ❌ | ❌ | 40 | | pomodoro_timer | ❌ | ❌ | ❌ | 41 | | url_shortener | ❌ | ❌ | ❌ | 42 | | file_explorer | ✅ | ✅ | ✅ | 43 | | markdown_editor | ❌ | ❌ | ❌ | 44 | | timer_app | ✅ | ❌ | ❌ | 45 | | weather_app | ❌ | ❌ | ❌ | 46 | | file_organizer | ✅ | ✅ | ✅ | 47 | | password_generator | ✅ | ✅ | ✅ | 48 | | todo_list | ✅ | ❌ | ❌ | 49 | 50 | ## Notes on the errors 51 | 52 | **timer_app** almost works with unit tests config 53 | 54 | - failure mode: undefined import/conflicting names 55 | 56 | **file_explorer** works 57 | 58 | **file organiser**: works 59 | 60 | **image_resizer** almost works with unit tests config 61 | 62 | - failure mode: undefined import 63 | 64 | **todo_list** runs. doesn't really work with unit tests config 65 | Uncaught ReferenceError: module is not defined 66 | 67 | - failure mode: placeholder text 68 | 69 | url_shortener starts but gets the error: 70 | SQLite objects created in a thread can only be used in that same thread. The object was created in thread id 8636125824 and this is thread id 13021003776. 71 | 72 | markdown_editor: 73 | failing tests, 'WebDriver' object has no attribute 'find_element_by_id' 74 | 75 | pomodoro: doesn't run it only tests 76 | 77 | currency_converter: backend doesn't return anything 78 | 79 | weather_app only runs test, no code existed 80 | -------------------------------------------------------------------------------- /benchmark/currency_converter/prompt: -------------------------------------------------------------------------------- 1 | Build a currency converter app using an API for exchange rates. Use HTML, CSS, and JavaScript for the frontend and Node.js for the backend. Allow users to convert between different currencies. 2 | -------------------------------------------------------------------------------- /benchmark/file_explorer/prompt: -------------------------------------------------------------------------------- 1 | Create a basic file explorer CLI tool in Python that allows users to navigate through directories, view file contents, and perform basic file operations (copy, move, delete). 2 | -------------------------------------------------------------------------------- /benchmark/file_organizer/prompt: -------------------------------------------------------------------------------- 1 | Create a file organizer CLI tool in Python that sorts files in a directory based on their file types (e.g., images, documents, audio) and moves them into corresponding folders. 2 | -------------------------------------------------------------------------------- /benchmark/image_resizer/prompt: -------------------------------------------------------------------------------- 1 | Create a CLI tool in Python that allows users to resize images by specifying the desired width and height. Use the Pillow library for image manipulation. 2 | -------------------------------------------------------------------------------- /benchmark/markdown_editor/prompt: -------------------------------------------------------------------------------- 1 | Build a simple markdown editor using HTML, CSS, and JavaScript. Allow users to input markdown text and display the formatted output in real-time. 2 | -------------------------------------------------------------------------------- /benchmark/password_generator/prompt: -------------------------------------------------------------------------------- 1 | Create a password generator CLI tool in Python that generates strong, random passwords based on user-specified criteria, such as length and character types (letters, numbers, symbols). 2 | -------------------------------------------------------------------------------- /benchmark/pomodoro_timer/prompt: -------------------------------------------------------------------------------- 1 | Develop a Pomodoro timer app using HTML, CSS, and JavaScript. Allow users to set work and break intervals and receive notifications when it's time to switch. 2 | -------------------------------------------------------------------------------- /benchmark/timer_app/prompt: -------------------------------------------------------------------------------- 1 | Create a simple timer app using HTML, CSS, and JavaScript that allows users to set a countdown timer and receive an alert when the time is up. 2 | -------------------------------------------------------------------------------- /benchmark/todo_list/prompt: -------------------------------------------------------------------------------- 1 | Create a simple to-do list app using HTML, CSS, and JavaScript. Store tasks in local storage and allow users to add, edit, and delete tasks. 2 | -------------------------------------------------------------------------------- /benchmark/url_shortener/prompt: -------------------------------------------------------------------------------- 1 | Create a URL shortener app using HTML, CSS, JavaScript, and a backend language like Python or Node.js. Allow users to input a long URL and generate a shortened version that redirects to the original URL. Store the shortened URLs in a database. 2 | -------------------------------------------------------------------------------- /gpt_engineer/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "morph" 2 | -------------------------------------------------------------------------------- /gpt_engineer/ai.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | 5 | from dataclasses import dataclass 6 | from typing import Dict, List 7 | 8 | import openai 9 | import tiktoken 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class TokenUsage: 16 | step_name: str 17 | in_step_prompt_tokens: int 18 | in_step_completion_tokens: int 19 | in_step_total_tokens: int 20 | total_prompt_tokens: int 21 | total_completion_tokens: int 22 | total_tokens: int 23 | 24 | 25 | class AI: 26 | def __init__(self, model="gpt-4", temperature=0.1): 27 | self.temperature = temperature 28 | self.model = model 29 | 30 | # initialize token usage log 31 | self.cumulative_prompt_tokens = 0 32 | self.cumulative_completion_tokens = 0 33 | self.cumulative_total_tokens = 0 34 | self.token_usage_log = [] 35 | 36 | try: 37 | self.tokenizer = tiktoken.encoding_for_model(model) 38 | except KeyError: 39 | logger.debug( 40 | f"Tiktoken encoder for model {model} not found. Using " 41 | "cl100k_base encoder instead. The results may therefore be " 42 | "inaccurate and should only be used as estimate." 43 | ) 44 | self.tokenizer = tiktoken.get_encoding("cl100k_base") 45 | 46 | def start(self, system, user, step_name): 47 | messages = [ 48 | {"role": "system", "content": system}, 49 | {"role": "user", "content": user}, 50 | ] 51 | 52 | return self.next(messages, step_name=step_name) 53 | 54 | def fsystem(self, msg): 55 | return {"role": "system", "content": msg} 56 | 57 | def fuser(self, msg): 58 | return {"role": "user", "content": msg} 59 | 60 | def fassistant(self, msg): 61 | return {"role": "assistant", "content": msg} 62 | 63 | def next(self, messages: List[Dict[str, str]], prompt=None, *, step_name=None): 64 | if prompt: 65 | messages += [{"role": "user", "content": prompt}] 66 | 67 | logger.debug(f"Creating a new chat completion: {messages}") 68 | response = openai.ChatCompletion.create( 69 | messages=messages, 70 | stream=True, 71 | model=self.model, 72 | temperature=self.temperature, 73 | ) 74 | 75 | chat = [] 76 | for chunk in response: 77 | delta = chunk["choices"][0]["delta"] # type: ignore 78 | msg = delta.get("content", "") 79 | print(msg, end="") 80 | chat.append(msg) 81 | print() 82 | messages += [{"role": "assistant", "content": "".join(chat)}] 83 | logger.debug(f"Chat completion finished: {messages}") 84 | 85 | self.update_token_usage_log( 86 | messages=messages, answer="".join(chat), step_name=step_name 87 | ) 88 | 89 | return messages 90 | 91 | def update_token_usage_log(self, messages, answer, step_name): 92 | prompt_tokens = self.num_tokens_from_messages(messages) 93 | completion_tokens = self.num_tokens(answer) 94 | total_tokens = prompt_tokens + completion_tokens 95 | 96 | self.cumulative_prompt_tokens += prompt_tokens 97 | self.cumulative_completion_tokens += completion_tokens 98 | self.cumulative_total_tokens += total_tokens 99 | 100 | self.token_usage_log.append( 101 | TokenUsage( 102 | step_name=step_name, 103 | in_step_prompt_tokens=prompt_tokens, 104 | in_step_completion_tokens=completion_tokens, 105 | in_step_total_tokens=total_tokens, 106 | total_prompt_tokens=self.cumulative_prompt_tokens, 107 | total_completion_tokens=self.cumulative_completion_tokens, 108 | total_tokens=self.cumulative_total_tokens, 109 | ) 110 | ) 111 | 112 | def format_token_usage_log(self): 113 | result = "step_name," 114 | result += "prompt_tokens_in_step,completion_tokens_in_step,total_tokens_in_step" 115 | result += ",total_prompt_tokens,total_completion_tokens,total_tokens\n" 116 | for log in self.token_usage_log: 117 | result += log.step_name + "," 118 | result += str(log.in_step_prompt_tokens) + "," 119 | result += str(log.in_step_completion_tokens) + "," 120 | result += str(log.in_step_total_tokens) + "," 121 | result += str(log.total_prompt_tokens) + "," 122 | result += str(log.total_completion_tokens) + "," 123 | result += str(log.total_tokens) + "\n" 124 | return result 125 | 126 | def num_tokens(self, txt): 127 | return len(self.tokenizer.encode(txt)) 128 | 129 | def num_tokens_from_messages(self, messages): 130 | """Returns the number of tokens used by a list of messages.""" 131 | n_tokens = 0 132 | for message in messages: 133 | n_tokens += ( 134 | 4 # every message follows {role/name}\n{content}\n 135 | ) 136 | for key, value in message.items(): 137 | n_tokens += self.num_tokens(value) 138 | if key == "name": # if there's a name, the role is omitted 139 | n_tokens += -1 # role is always required and always 1 token 140 | n_tokens += 2 # every reply is primed with assistant 141 | return n_tokens 142 | 143 | 144 | def fallback_model(model: str) -> str: 145 | try: 146 | openai.Model.retrieve(model) 147 | return model 148 | except openai.InvalidRequestError: 149 | print( 150 | f"Model {model} not available for provided API key. Reverting " 151 | "to gpt-3.5-turbo. Sign up for the GPT-4 wait list here: " 152 | "https://openai.com/waitlist/gpt-4-api\n" 153 | ) 154 | return "gpt-3.5-turbo-16k" 155 | -------------------------------------------------------------------------------- /gpt_engineer/chat_to_files.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def parse_chat(chat): # -> List[Tuple[str, str]]: 5 | # Get all ``` blocks and preceding filenames 6 | regex = r"(\S+)\n\s*```[^\n]*\n(.+?)```" 7 | matches = re.finditer(regex, chat, re.DOTALL) 8 | 9 | files = [] 10 | for match in matches: 11 | # Strip the filename of any non-allowed characters and convert / to \ 12 | path = re.sub(r'[<>"|?*]', "", match.group(1)) 13 | 14 | # Remove leading and trailing brackets 15 | path = re.sub(r"^\[(.*)\]$", r"\1", path) 16 | 17 | # Remove leading and trailing backticks 18 | path = re.sub(r"^`(.*)`$", r"\1", path) 19 | 20 | # Remove trailing ] 21 | path = re.sub(r"\]$", "", path) 22 | 23 | # Get the code 24 | code = match.group(2) 25 | 26 | # Add the file to the list 27 | files.append((path, code)) 28 | 29 | # Get all the text before the first ``` block 30 | readme = chat.split("```")[0] 31 | files.append(("README.md", readme)) 32 | 33 | # Return the files 34 | return files 35 | 36 | 37 | def to_files(chat, workspace): 38 | workspace["all_output.txt"] = chat 39 | 40 | files = parse_chat(chat) 41 | for file_name, file_content in files: 42 | workspace[file_name] = file_content 43 | -------------------------------------------------------------------------------- /gpt_engineer/collect.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | from typing import List 4 | 5 | from gpt_engineer import steps 6 | from gpt_engineer.db import DBs 7 | from gpt_engineer.domain import Step 8 | from gpt_engineer.learning import Learning, extract_learning 9 | 10 | 11 | def send_learning(learning: Learning): 12 | import rudderstack.analytics as rudder_analytics 13 | 14 | rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG" 15 | rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com" 16 | 17 | rudder_analytics.track( 18 | user_id=learning.session, 19 | event="learning", 20 | properties=learning.to_dict(), # type: ignore 21 | ) 22 | 23 | 24 | def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs): 25 | learnings = extract_learning( 26 | model, temperature, steps, dbs, steps_file_hash=steps_file_hash() 27 | ) 28 | send_learning(learnings) 29 | 30 | 31 | def steps_file_hash(): 32 | with open(steps.__file__, "r") as f: 33 | content = f.read() 34 | return hashlib.sha256(content.encode("utf-8")).hexdigest() 35 | -------------------------------------------------------------------------------- /gpt_engineer/db.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import shutil 3 | 4 | from dataclasses import dataclass 5 | from pathlib import Path 6 | from typing import Any, Dict, Optional 7 | 8 | 9 | # This class represents a simple database that stores its data as files in a directory. 10 | class DB: 11 | """A simple key-value store, where keys are filenames and values are file contents.""" 12 | 13 | def __init__(self, path, in_memory_dict: Optional[Dict[Any, Any]] = None): 14 | self.path = Path(path).absolute() 15 | 16 | self.path.mkdir(parents=True, exist_ok=True) 17 | self.in_memory_dict = in_memory_dict 18 | 19 | def __contains__(self, key): 20 | return (self.path / key).is_file() 21 | 22 | def __getitem__(self, key): 23 | if self.in_memory_dict is not None: 24 | return self.in_memory_dict.__getitem__(str((self.path / key).absolute())) 25 | full_path = self.path / key 26 | if not full_path.is_file(): 27 | raise KeyError(f"File '{key}' could not be found in '{self.path}'") 28 | with full_path.open("r", encoding="utf-8") as f: 29 | return f.read() 30 | 31 | def get(self, key, default=None): 32 | try: 33 | return self[key] 34 | except KeyError: 35 | return default 36 | 37 | def __setitem__(self, key, val): 38 | if self.in_memory_dict is not None: 39 | return self.in_memory_dict.__setitem__(str((self.path / key).absolute()), val) 40 | full_path = self.path / key 41 | full_path.parent.mkdir(parents=True, exist_ok=True) 42 | 43 | if isinstance(val, str): 44 | full_path.write_text(val, encoding="utf-8") 45 | else: 46 | # If val is neither a string nor bytes, raise an error. 47 | raise TypeError("val must be either a str or bytes") 48 | 49 | 50 | # dataclass for all dbs: 51 | @dataclass 52 | class DBs: 53 | memory: DB 54 | logs: DB 55 | preprompts: DB 56 | input: DB 57 | workspace: DB 58 | archive: DB 59 | 60 | 61 | def archive(dbs: DBs): 62 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 63 | shutil.move( 64 | str(dbs.memory.path), str(dbs.archive.path / timestamp / dbs.memory.path.name) 65 | ) 66 | shutil.move( 67 | str(dbs.workspace.path), 68 | str(dbs.archive.path / timestamp / dbs.workspace.path.name), 69 | ) 70 | return [] 71 | -------------------------------------------------------------------------------- /gpt_engineer/domain.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, TypeVar 2 | 3 | from gpt_engineer.ai import AI 4 | from gpt_engineer.db import DBs 5 | 6 | Step = TypeVar("Step", bound=Callable[[AI, DBs], List[dict]]) 7 | -------------------------------------------------------------------------------- /gpt_engineer/learning.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | import tempfile 5 | 6 | from dataclasses import dataclass, field 7 | from datetime import datetime 8 | from pathlib import Path 9 | from typing import List, Optional 10 | 11 | from dataclasses_json import dataclass_json 12 | from termcolor import colored 13 | 14 | from gpt_engineer.db import DB, DBs 15 | from gpt_engineer.domain import Step 16 | 17 | 18 | @dataclass_json 19 | @dataclass 20 | class Review: 21 | ran: Optional[bool] 22 | perfect: Optional[bool] 23 | works: Optional[bool] 24 | comments: str 25 | raw: str 26 | 27 | 28 | @dataclass_json 29 | @dataclass 30 | class Learning: 31 | model: str 32 | temperature: float 33 | steps: str 34 | steps_file_hash: str 35 | prompt: str 36 | logs: str 37 | workspace: str 38 | feedback: Optional[str] 39 | session: str 40 | review: Optional[Review] 41 | timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) 42 | version: str = "0.3" 43 | 44 | 45 | def colored(*args): 46 | return args[0] 47 | 48 | TERM_CHOICES = ( 49 | colored("y", "green") 50 | + "/" 51 | + colored("n", "red") 52 | + "/" 53 | + colored("u", "yellow") 54 | + "(ncertain): " 55 | ) 56 | 57 | 58 | def human_input() -> Review: 59 | print() 60 | print( 61 | colored("To help gpt-engineer learn, please answer 3 questions:", "light_green") 62 | ) 63 | print() 64 | 65 | ran = input("Did the generated code run at all? " + TERM_CHOICES) 66 | while ran not in ("y", "n", "u"): 67 | ran = input("Invalid input. Please enter y, n, or u: ") 68 | 69 | perfect = "" 70 | useful = "" 71 | 72 | if ran == "y": 73 | perfect = input( 74 | "Did the generated code do everything you wanted? " + TERM_CHOICES 75 | ) 76 | while perfect not in ("y", "n", "u"): 77 | perfect = input("Invalid input. Please enter y, n, or u: ") 78 | 79 | if perfect != "y": 80 | useful = input("Did the generated code do anything useful? " + TERM_CHOICES) 81 | while useful not in ("y", "n", "u"): 82 | useful = input("Invalid input. Please enter y, n, or u: ") 83 | 84 | comments = "" 85 | if perfect != "y": 86 | comments = input( 87 | "If you have time, please explain what was not working " 88 | + colored("(ok to leave blank)\n", "light_green") 89 | ) 90 | 91 | check_consent() 92 | 93 | return Review( 94 | raw=", ".join([ran, perfect, useful]), 95 | ran={"y": True, "n": False, "u": None, "": None}[ran], 96 | works={"y": True, "n": False, "u": None, "": None}[useful], 97 | perfect={"y": True, "n": False, "u": None, "": None}[perfect], 98 | comments=comments, 99 | ) 100 | 101 | 102 | def check_consent(): 103 | path = Path(".gpte_consent") 104 | if path.exists() and path.read_text() == "true": 105 | return 106 | ans = input("Is it ok if we store your prompts to learn? (y/n)") 107 | while ans.lower() not in ("y", "n"): 108 | ans = input("Invalid input. Please enter y or n: ") 109 | 110 | if ans.lower() == "y": 111 | path.write_text("true") 112 | print(colored("Thank you️", "light_green")) 113 | print() 114 | print("(If you change your mind, delete the file .gpte_consent)") 115 | else: 116 | print(colored("We understand ❤️", "light_green")) 117 | 118 | 119 | def collect_consent() -> bool: 120 | opt_out = os.environ.get("COLLECT_LEARNINGS_OPT_OUT") == "true" 121 | consent_flag = Path(".gpte_consent") 122 | has_given_consent = consent_flag.exists() and consent_flag.read_text() == "true" 123 | 124 | if opt_out: 125 | if has_given_consent: 126 | return ask_if_can_store() 127 | return False 128 | 129 | if has_given_consent: 130 | return True 131 | 132 | if ask_if_can_store(): 133 | consent_flag.write_text("true") 134 | print() 135 | print("(If you change your mind, delete the file .gpte_consent)") 136 | return True 137 | return False 138 | 139 | 140 | def ask_if_can_store() -> bool: 141 | print() 142 | can_store = input( 143 | "Have you understood and agree to that " 144 | + colored("OpenAI ", "light_green") 145 | + "and " 146 | + colored("gpt-engineer ", "light_green") 147 | + "store anonymous learnings about how gpt-engineer is used " 148 | + "(with the sole purpose of improving it)?\n(y/n)" 149 | ).lower() 150 | while can_store not in ("y", "n"): 151 | can_store = input("Invalid input. Please enter y or n: ").lower() 152 | 153 | if can_store == "n": 154 | print(colored("Ok we understand", "light_green")) 155 | 156 | return can_store == "y" 157 | 158 | 159 | def logs_to_string(steps: List[Step], logs: DB): 160 | chunks = [] 161 | for step in steps: 162 | chunks.append(f"--- {step.__name__} ---\n") 163 | messages = json.loads(logs[step.__name__]) 164 | chunks.append(format_messages(messages)) 165 | return "\n".join(chunks) 166 | 167 | 168 | def format_messages(messages: List[dict]) -> str: 169 | return "\n".join( 170 | [f"{message['role']}:\n\n{message['content']}" for message in messages] 171 | ) 172 | 173 | 174 | def extract_learning( 175 | model: str, temperature: float, steps: List[Step], dbs: DBs, steps_file_hash 176 | ) -> Learning: 177 | review = None 178 | if "review" in dbs.memory: 179 | review = Review.from_json(dbs.memory["review"]) # type: ignore 180 | learning = Learning( 181 | prompt=dbs.input["prompt"], 182 | model=model, 183 | temperature=temperature, 184 | steps=json.dumps([step.__name__ for step in steps]), 185 | steps_file_hash=steps_file_hash, 186 | feedback=dbs.input.get("feedback"), 187 | session=get_session(), 188 | logs=logs_to_string(steps, dbs.logs), 189 | workspace=dbs.workspace["all_output.txt"], 190 | review=review, 191 | ) 192 | return learning 193 | 194 | 195 | def get_session(): 196 | path = Path(tempfile.gettempdir()) / "gpt_engineer_user_id.txt" 197 | 198 | try: 199 | if path.exists(): 200 | user_id = path.read_text() 201 | else: 202 | # random uuid: 203 | user_id = str(random.randint(0, 2**32)) 204 | path.write_text(user_id) 205 | return user_id 206 | except IOError: 207 | return "ephemeral_" + str(random.randint(0, 2**32)) 208 | -------------------------------------------------------------------------------- /gpt_engineer/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from pathlib import Path 5 | 6 | import typer 7 | 8 | from gpt_engineer.ai import AI, fallback_model 9 | from gpt_engineer.collect import collect_learnings 10 | from gpt_engineer.db import DB, DBs, archive 11 | from gpt_engineer.learning import collect_consent 12 | from gpt_engineer.steps import STEPS, Config as StepsConfig 13 | 14 | app = typer.Typer() 15 | 16 | 17 | @app.command() 18 | def main( 19 | project_path: str = typer.Argument("projects/example", help="path"), 20 | model: str = typer.Argument("gpt-4", help="model id string"), 21 | temperature: float = 0.1, 22 | steps_config: StepsConfig = typer.Option( 23 | StepsConfig.DEFAULT, "--steps", "-s", help="decide which steps to run" 24 | ), 25 | verbose: bool = typer.Option(False, "--verbose", "-v"), 26 | ): 27 | logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) 28 | 29 | model = fallback_model(model) 30 | ai = AI( 31 | model=model, 32 | temperature=temperature, 33 | ) 34 | 35 | input_path = Path(project_path).absolute() 36 | memory_path = input_path / "memory" 37 | workspace_path = input_path / "workspace" 38 | archive_path = input_path / "archive" 39 | 40 | dbs = DBs( 41 | memory=DB(memory_path), 42 | logs=DB(memory_path / "logs"), 43 | input=DB(input_path), 44 | workspace=DB(workspace_path), 45 | preprompts=DB(Path(__file__).parent / "preprompts"), 46 | archive=DB(archive_path), 47 | ) 48 | 49 | if steps_config not in [ 50 | StepsConfig.EXECUTE_ONLY, 51 | StepsConfig.USE_FEEDBACK, 52 | StepsConfig.EVALUATE, 53 | ]: 54 | archive(dbs) 55 | 56 | steps = STEPS[steps_config] 57 | for step in steps: 58 | messages = step(ai, dbs) 59 | dbs.logs[step.__name__] = json.dumps(messages) 60 | 61 | if collect_consent(): 62 | collect_learnings(model, temperature, steps, dbs) 63 | 64 | dbs.logs["token_usage"] = ai.format_token_usage_log() 65 | 66 | 67 | if __name__ == "__main__": 68 | app() 69 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/fix_code: -------------------------------------------------------------------------------- 1 | You are a super smart developer. You have been tasked with fixing a program and making it work according to the best of your knowledge. There might be placeholders in the code you have to fill in. 2 | You provide fully functioning, well formatted code with few comments, that works and has no bugs. 3 | Please return the full new code in the same format. 4 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/generate: -------------------------------------------------------------------------------- 1 | You will get instructions for code to write. 2 | You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code. 3 | 4 | Think step by step and reason yourself to the right decisions to make sure we get it right. 5 | You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose. 6 | 7 | Then you will output the content of each file including ALL code. 8 | Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that 9 | FILENAME is the lowercase file name including the file extension, 10 | LANG is the markup code block language for the code's language, and CODE is the code: 11 | 12 | FILENAME 13 | ```LANG 14 | CODE 15 | ``` 16 | 17 | Do not comment on what every file does 18 | 19 | You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on. 20 | Please note that the code should be fully functional. No placeholders. 21 | 22 | Follow a language and framework appropriate best practice file naming convention. 23 | Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other. 24 | Ensure to implement all code, if you are unsure, write a plausible implementation. 25 | Include module dependency or package manager dependency definition file. 26 | Before you finish, double check that all parts of the architecture is present in the files. 27 | 28 | Here is an example: 29 | 30 | hello_world.py 31 | ```python 32 | print("hello world!") 33 | ``` -------------------------------------------------------------------------------- /gpt_engineer/preprompts/philosophy: -------------------------------------------------------------------------------- 1 | You almost always put different classes in different files. 2 | For Python, you always create an appropriate requirements.txt file. 3 | For NodeJS, you always create an appropriate package.json file. 4 | You always add a comment briefly describing the purpose of the function definition. 5 | You try to add comments explaining very complex bits of logic. 6 | You always follow the best practices for the requested languages in terms of describing the code written as a defined 7 | package/project. 8 | 9 | 10 | Python toolbelt preferences: 11 | - pytest 12 | - dataclasses 13 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/qa: -------------------------------------------------------------------------------- 1 | You will read instructions and not carry them out, only seek to clarify them. 2 | Specifically you will first summarise a list of super short bullets of areas that need clarification. 3 | Then you will pick one clarifying question, and wait for an answer from the user. 4 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/respec: -------------------------------------------------------------------------------- 1 | You are a pragmatic principal engineer at Google. 2 | You have been asked to review a specification for a new feature by a previous version of yourself 3 | 4 | You have been asked to give feedback on the following: 5 | - Is there anything that might not work the way intended by the instructions? 6 | - Is there anything in the specification missing for the program to work as expected? 7 | - Is there anything that can be simplified without significant drawback? 8 | 9 | You are asked to make educated assumptions for each unclear item. 10 | For each of these, communicate which assumptions you'll make when implementing the feature. 11 | 12 | Think step by step to make sure we don't miss anything. 13 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/spec: -------------------------------------------------------------------------------- 1 | You are a super smart developer. You have been asked to make a specification for a program. 2 | 3 | Think step by step to make sure we get a high quality specification and we don't miss anything. 4 | First, be super explicit about what the program should do, which features it should have 5 | and give details about anything that might be unclear. **Don't leave anything unclear or undefined.** 6 | 7 | Second, lay out the names of the core classes, functions, methods that will be necessary, 8 | as well as a quick comment on their purpose. 9 | 10 | This specification will be used later as the basis for the implementation. 11 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/unit_tests: -------------------------------------------------------------------------------- 1 | You are a super smart developer using Test Driven Development to write tests according to a specification. 2 | 3 | Please generate tests based on the above specification. The tests should be as simple as possible, but still cover all the functionality. 4 | -------------------------------------------------------------------------------- /gpt_engineer/preprompts/use_feedback: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/morph-labs/gpt-engineer/900202a5498af365e91a46685afe5af787c05b3b/gpt_engineer/preprompts/use_feedback -------------------------------------------------------------------------------- /gpt_engineer/preprompts/use_qa: -------------------------------------------------------------------------------- 1 | Please now remember the steps: 2 | 3 | Think step by step and reason yourself to the right decisions to make sure we get it right. 4 | First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose. 5 | 6 | Then you will output the content of each file including ALL code. 7 | Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that 8 | FILENAME is the lowercase file name including the file extension, 9 | LANG is the markup code block language for the code's language, and CODE is the code: 10 | 11 | FILENAME 12 | ```LANG 13 | CODE 14 | ``` 15 | 16 | Please note that the code should be fully functional. No placeholders. 17 | 18 | You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on. 19 | Follow a language and framework appropriate best practice file naming convention. 20 | Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. 21 | Before you finish, double check that all parts of the architecture is present in the files. 22 | -------------------------------------------------------------------------------- /gpt_engineer/steps.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import json 3 | import re 4 | import subprocess 5 | 6 | from enum import Enum 7 | from typing import List 8 | 9 | from termcolor import colored 10 | 11 | from gpt_engineer.ai import AI 12 | from gpt_engineer.chat_to_files import to_files 13 | from gpt_engineer.db import DBs 14 | from gpt_engineer.learning import human_input 15 | 16 | 17 | def colored(*args): 18 | return args[0] 19 | 20 | def setup_sys_prompt(dbs: DBs) -> str: 21 | return ( 22 | dbs.preprompts["generate"] + "\nUseful to know:\n" + dbs.preprompts["philosophy"] 23 | ) 24 | 25 | 26 | def get_prompt(dbs: DBs) -> str: 27 | """While we migrate we have this fallback getter""" 28 | assert ( 29 | "prompt" in dbs.input or "main_prompt" in dbs.input 30 | ), "Please put your prompt in the file `prompt` in the project directory" 31 | 32 | if "prompt" not in dbs.input: 33 | print( 34 | colored("Please put the prompt in the file `prompt`, not `main_prompt", "red") 35 | ) 36 | print() 37 | return dbs.input["main_prompt"] 38 | 39 | return dbs.input["prompt"] 40 | 41 | 42 | def curr_fn() -> str: 43 | """Get the name of the current function""" 44 | return inspect.stack()[1].function 45 | 46 | 47 | # All steps below have the signature Step 48 | 49 | 50 | def simple_gen(ai: AI, dbs: DBs) -> List[dict]: 51 | """Run the AI on the main prompt and save the results""" 52 | messages = ai.start(setup_sys_prompt(dbs), get_prompt(dbs), step_name=curr_fn()) 53 | to_files(messages[-1]["content"], dbs.workspace) 54 | return messages 55 | 56 | 57 | def clarify(ai: AI, dbs: DBs) -> List[dict]: 58 | """ 59 | Ask the user if they want to clarify anything and save the results to the workspace 60 | """ 61 | messages = [ai.fsystem(dbs.preprompts["qa"])] 62 | user_input = get_prompt(dbs) 63 | while True: 64 | messages = ai.next(messages, user_input, step_name=curr_fn()) 65 | 66 | if messages[-1]["content"].strip() == "Nothing more to clarify.": 67 | break 68 | 69 | if messages[-1]["content"].strip().lower().startswith("no"): 70 | print("Nothing more to clarify.") 71 | break 72 | 73 | print() 74 | user_input = input('(answer in text, or "c" to move on)\n') 75 | print() 76 | 77 | if not user_input or user_input == "c": 78 | print("(letting gpt-engineer make its own assumptions)") 79 | print() 80 | messages = ai.next( 81 | messages, 82 | "Make your own assumptions and state them explicitly before starting", 83 | step_name=curr_fn(), 84 | ) 85 | print() 86 | return messages 87 | 88 | user_input += ( 89 | "\n\n" 90 | "Is anything else unclear? If yes, only answer in the form:\n" 91 | "{remaining unclear areas} remaining questions.\n" 92 | "{Next question}\n" 93 | 'If everything is sufficiently clear, only answer "Nothing more to clarify.".' 94 | ) 95 | 96 | print() 97 | return messages 98 | 99 | 100 | def gen_spec(ai: AI, dbs: DBs) -> List[dict]: 101 | """ 102 | Generate a spec from the main prompt + clarifications and save the results to 103 | the workspace 104 | """ 105 | messages = [ 106 | ai.fsystem(setup_sys_prompt(dbs)), 107 | ai.fsystem(f"Instructions: {dbs.input['prompt']}"), 108 | ] 109 | 110 | messages = ai.next(messages, dbs.preprompts["spec"], step_name=curr_fn()) 111 | 112 | dbs.memory["specification"] = messages[-1]["content"] 113 | 114 | return messages 115 | 116 | 117 | def respec(ai: AI, dbs: DBs) -> List[dict]: 118 | messages = json.loads(dbs.logs[gen_spec.__name__]) 119 | messages += [ai.fsystem(dbs.preprompts["respec"])] 120 | 121 | messages = ai.next(messages, step_name=curr_fn()) 122 | messages = ai.next( 123 | messages, 124 | ( 125 | "Based on the conversation so far, please reiterate the specification for " 126 | "the program. " 127 | "If there are things that can be improved, please incorporate the " 128 | "improvements. " 129 | "If you are satisfied with the specification, just write out the " 130 | "specification word by word again." 131 | ), 132 | step_name=curr_fn(), 133 | ) 134 | 135 | dbs.memory["specification"] = messages[-1]["content"] 136 | return messages 137 | 138 | 139 | def gen_unit_tests(ai: AI, dbs: DBs) -> List[dict]: 140 | """ 141 | Generate unit tests based on the specification, that should work. 142 | """ 143 | messages = [ 144 | ai.fsystem(setup_sys_prompt(dbs)), 145 | ai.fuser(f"Instructions: {dbs.input['prompt']}"), 146 | ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"), 147 | ] 148 | 149 | messages = ai.next(messages, dbs.preprompts["unit_tests"], step_name=curr_fn()) 150 | 151 | dbs.memory["unit_tests"] = messages[-1]["content"] 152 | to_files(dbs.memory["unit_tests"], dbs.workspace) 153 | 154 | return messages 155 | 156 | 157 | def gen_clarified_code(ai: AI, dbs: DBs) -> List[dict]: 158 | """Takes clarification and generates code""" 159 | messages = json.loads(dbs.logs[clarify.__name__]) 160 | 161 | messages = [ 162 | ai.fsystem(setup_sys_prompt(dbs)), 163 | ] + messages[1:] 164 | messages = ai.next(messages, dbs.preprompts["use_qa"], step_name=curr_fn()) 165 | 166 | to_files(messages[-1]["content"], dbs.workspace) 167 | return messages 168 | 169 | 170 | def gen_code(ai: AI, dbs: DBs) -> List[dict]: 171 | # get the messages from previous step 172 | messages = [ 173 | ai.fsystem(setup_sys_prompt(dbs)), 174 | ai.fuser(f"Instructions: {dbs.input['prompt']}"), 175 | ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"), 176 | ai.fuser(f"Unit tests:\n\n{dbs.memory['unit_tests']}"), 177 | ] 178 | messages = ai.next(messages, dbs.preprompts["use_qa"], step_name=curr_fn()) 179 | to_files(messages[-1]["content"], dbs.workspace) 180 | return messages 181 | 182 | 183 | def execute_entrypoint(ai: AI, dbs: DBs) -> List[dict]: 184 | command = dbs.workspace["run.sh"] 185 | 186 | print("Do you want to execute this code?") 187 | print() 188 | print(command) 189 | print() 190 | print('If yes, press enter. Otherwise, type "no"') 191 | print() 192 | if input() not in ["", "y", "yes"]: 193 | print("Ok, not executing the code.") 194 | return [] 195 | print("Executing the code...") 196 | print() 197 | print( 198 | colored( 199 | "Note: If it does not work as expected, consider running the code" 200 | + " in another way than above.", 201 | "green", 202 | ) 203 | ) 204 | print() 205 | print("You can press ctrl+c *once* to stop the execution.") 206 | print() 207 | 208 | p = subprocess.Popen("bash run.sh", shell=True, cwd=dbs.workspace.path) 209 | try: 210 | p.wait() 211 | except KeyboardInterrupt: 212 | print() 213 | print("Stopping execution.") 214 | print("Execution stopped.") 215 | p.kill() 216 | print() 217 | 218 | return [] 219 | 220 | 221 | def gen_entrypoint(ai: AI, dbs: DBs) -> List[dict]: 222 | messages = ai.start( 223 | system=( 224 | "You will get information about a codebase that is currently on disk in " 225 | "the current folder.\n" 226 | "From this you will answer with code blocks that includes all the necessary " 227 | "unix terminal commands to " 228 | "a) install dependencies " 229 | "b) run all necessary parts of the codebase (in parallel if necessary).\n" 230 | "Do not install globally. Do not use sudo.\n" 231 | "Do not explain the code, just give the commands.\n" 232 | "Do not use placeholders, use example values (like . for a folder argument) " 233 | "if necessary.\n" 234 | ), 235 | user="Information about the codebase:\n\n" + dbs.workspace["all_output.txt"], 236 | step_name=curr_fn(), 237 | ) 238 | print() 239 | 240 | regex = r"```\S*\n(.+?)```" 241 | matches = re.finditer(regex, messages[-1]["content"], re.DOTALL) 242 | dbs.workspace["run.sh"] = "\n".join(match.group(1) for match in matches) 243 | return messages 244 | 245 | 246 | def use_feedback(ai: AI, dbs: DBs): 247 | messages = [ 248 | ai.fsystem(setup_sys_prompt(dbs)), 249 | ai.fuser(f"Instructions: {dbs.input['prompt']}"), 250 | ai.fassistant(dbs.workspace["all_output.txt"]), 251 | ai.fsystem(dbs.preprompts["use_feedback"]), 252 | ] 253 | messages = ai.next(messages, dbs.input["feedback"], step_name=curr_fn()) 254 | to_files(messages[-1]["content"], dbs.workspace) 255 | return messages 256 | 257 | 258 | def fix_code(ai: AI, dbs: DBs): 259 | code_output = json.loads(dbs.logs[gen_code.__name__])[-1]["content"] 260 | messages = [ 261 | ai.fsystem(setup_sys_prompt(dbs)), 262 | ai.fuser(f"Instructions: {dbs.input['prompt']}"), 263 | ai.fuser(code_output), 264 | ai.fsystem(dbs.preprompts["fix_code"]), 265 | ] 266 | messages = ai.next( 267 | messages, "Please fix any errors in the code above.", step_name=curr_fn() 268 | ) 269 | to_files(messages[-1]["content"], dbs.workspace) 270 | return messages 271 | 272 | 273 | def human_review(ai: AI, dbs: DBs): 274 | review = human_input() 275 | dbs.memory["review"] = review.to_json() # type: ignore 276 | return [] 277 | 278 | 279 | class Config(str, Enum): 280 | DEFAULT = "default" 281 | BENCHMARK = "benchmark" 282 | SIMPLE = "simple" 283 | TDD = "tdd" 284 | TDD_PLUS = "tdd+" 285 | CLARIFY = "clarify" 286 | RESPEC = "respec" 287 | EXECUTE_ONLY = "execute_only" 288 | EVALUATE = "evaluate" 289 | USE_FEEDBACK = "use_feedback" 290 | 291 | 292 | # Different configs of what steps to run 293 | STEPS = { 294 | Config.DEFAULT: [ 295 | clarify, 296 | gen_clarified_code, 297 | gen_entrypoint, 298 | execute_entrypoint, 299 | human_review, 300 | ], 301 | Config.BENCHMARK: [simple_gen, gen_entrypoint], 302 | Config.SIMPLE: [simple_gen, gen_entrypoint, execute_entrypoint], 303 | Config.TDD: [ 304 | gen_spec, 305 | gen_unit_tests, 306 | gen_code, 307 | gen_entrypoint, 308 | execute_entrypoint, 309 | human_review, 310 | ], 311 | Config.TDD_PLUS: [ 312 | gen_spec, 313 | gen_unit_tests, 314 | gen_code, 315 | fix_code, 316 | gen_entrypoint, 317 | execute_entrypoint, 318 | human_review, 319 | ], 320 | Config.CLARIFY: [ 321 | clarify, 322 | gen_clarified_code, 323 | gen_entrypoint, 324 | execute_entrypoint, 325 | human_review, 326 | ], 327 | Config.RESPEC: [ 328 | gen_spec, 329 | respec, 330 | gen_unit_tests, 331 | gen_code, 332 | fix_code, 333 | gen_entrypoint, 334 | execute_entrypoint, 335 | human_review, 336 | ], 337 | Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review], 338 | Config.EXECUTE_ONLY: [execute_entrypoint], 339 | Config.EVALUATE: [execute_entrypoint, human_review], 340 | } 341 | 342 | # Future steps that can be added: 343 | # run_tests_and_fix_files 344 | # execute_entrypoint_and_fix_files_if_it_results_in_error 345 | -------------------------------------------------------------------------------- /projects/example/prompt: -------------------------------------------------------------------------------- 1 | We are writing snake in python. MVC components split in separate files. Keyboard control. 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | 4 | [project] 5 | name = "gpt-engineer" 6 | version = "0.0.7" 7 | description = "Specify what you want it to build, the AI asks for clarification, and then builds it." 8 | readme = "README.md" 9 | requires-python = ">=3.8" 10 | dependencies = [ 11 | 'black == 23.3.0', 12 | 'click >= 8.0.0', 13 | 'mypy == 1.3.0', 14 | 'openai == 0.27.8', 15 | 'pre-commit == 3.3.3', 16 | 'pytest == 7.3.1', 17 | 'ruff == 0.0.272', 18 | 'termcolor==2.3.0', 19 | 'typer >= 0.3.2', 20 | 'rudder-sdk-python == 2.0.2', 21 | 'dataclasses-json == 0.5.7', 22 | 'tiktoken', 23 | 'tabulate == 0.9.0', 24 | ] 25 | 26 | classifiers = [ 27 | "Development Status :: 4 - Beta", 28 | "Programming Language :: Python :: 3.8", 29 | "Programming Language :: Python :: 3.9", 30 | "Programming Language :: Python :: 3.10", 31 | "Programming Language :: Python :: 3.11", 32 | "License :: OSI Approved :: MIT License", 33 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 34 | ] 35 | 36 | [project.scripts] 37 | gpt-engineer = 'gpt_engineer.main:app' 38 | 39 | [tool.setuptools] 40 | packages = ["gpt_engineer"] 41 | 42 | [tool.ruff] 43 | select = ["F", "E", "W", "I001"] 44 | line-length = 90 45 | show-fixes = false 46 | target-version = "py311" 47 | task-tags = ["TODO", "FIXME"] 48 | exclude = [ 49 | ".bzr", 50 | ".direnv", 51 | ".eggs", 52 | ".git", 53 | ".ruff_cache", 54 | ".svn", 55 | ".tox", 56 | ".venv", 57 | "__pypackages__", 58 | "_build", 59 | "buck-out", 60 | "build", 61 | "dist", 62 | "node_modules", 63 | "venv", 64 | ] 65 | 66 | [project.urls] 67 | "Homepage" = "https://github.com/AntonOsika/gpt-engineer" 68 | "Bug Tracker" = "https://github.com/AntonOsika/gpt-engineer/issues" 69 | 70 | [tool.ruff.isort] 71 | known-first-party = [] 72 | known-third-party = [] 73 | section-order = [ 74 | "future", 75 | "standard-library", 76 | "third-party", 77 | "first-party", 78 | "local-folder", 79 | ] 80 | combine-as-imports = true 81 | split-on-trailing-comma = false 82 | lines-between-types = 1 83 | 84 | [tool.black] 85 | line-length = 90 86 | target-version = ["py311"] 87 | include = '\.pyi?$' 88 | exclude = ''' 89 | ( 90 | /( 91 | \.direnv 92 | | \.eggs 93 | | \.git 94 | | \.tox 95 | | \.venv 96 | | _build 97 | | build 98 | | dist 99 | | venv 100 | )/ 101 | ) 102 | ''' 103 | -------------------------------------------------------------------------------- /scripts/benchmark.py: -------------------------------------------------------------------------------- 1 | # list all folders in benchmark folder 2 | # for each folder, run the benchmark 3 | import contextlib 4 | import json 5 | import os 6 | import subprocess 7 | 8 | from datetime import datetime 9 | from itertools import islice 10 | from pathlib import Path 11 | from typing import Iterable, Union 12 | 13 | from tabulate import tabulate 14 | from typer import run 15 | 16 | 17 | def main( 18 | n_benchmarks: Union[int, None] = None, 19 | ): 20 | path = Path("benchmark") 21 | 22 | folders: Iterable[Path] = path.iterdir() 23 | 24 | if n_benchmarks: 25 | folders = islice(folders, n_benchmarks) 26 | 27 | benchmarks = [] 28 | for bench_folder in folders: 29 | if os.path.isdir(bench_folder): 30 | print(f"Running benchmark for {bench_folder}") 31 | 32 | log_path = bench_folder / "log.txt" 33 | log_file = open(log_path, "w") 34 | process = subprocess.Popen( 35 | [ 36 | "python", 37 | "-u", # Unbuffered output 38 | "-m", 39 | "gpt_engineer.main", 40 | bench_folder, 41 | "--steps", 42 | "benchmark", 43 | ], 44 | stdout=log_file, 45 | stderr=log_file, 46 | bufsize=0, 47 | ) 48 | benchmarks.append((bench_folder, process, log_file)) 49 | 50 | print("You can stream the log file by running:") 51 | print(f"tail -f {log_path}") 52 | print() 53 | 54 | for bench_folder, process, file in benchmarks: 55 | process.wait() 56 | file.close() 57 | 58 | print("process", bench_folder.name, "finished with code", process.returncode) 59 | print("Running it. Original benchmark prompt:") 60 | print() 61 | with open(bench_folder / "prompt") as f: 62 | print(f.read()) 63 | print() 64 | 65 | with contextlib.suppress(KeyboardInterrupt): 66 | subprocess.run( 67 | [ 68 | "python", 69 | "-m", 70 | "gpt_engineer.main", 71 | bench_folder, 72 | "--steps", 73 | "evaluate", 74 | ], 75 | ) 76 | 77 | generate_report(benchmarks, path) 78 | 79 | 80 | def generate_report(benchmarks, benchmark_path): 81 | headers = ["Benchmark", "Ran", "Works", "Perfect", "Notes"] 82 | rows = [] 83 | for bench_folder, _, _ in benchmarks: 84 | memory = bench_folder / "memory" 85 | with open(memory / "review") as f: 86 | review = json.loads(f.read()) 87 | rows.append( 88 | [ 89 | bench_folder.name, 90 | to_emoji(review.get("ran", None)), 91 | to_emoji(review.get("works", None)), 92 | to_emoji(review.get("perfect", None)), 93 | review.get("comments", None), 94 | ] 95 | ) 96 | table: str = tabulate(rows, headers, tablefmt="pipe") 97 | print("\nBenchmark report:\n") 98 | print(table) 99 | print() 100 | append_to_results = ask_yes_no("Append report to the results file?") 101 | if append_to_results: 102 | results_path = benchmark_path / "RESULTS.md" 103 | current_date = datetime.now().strftime("%Y-%m-%d") 104 | insert_markdown_section(results_path, current_date, table, 2) 105 | 106 | 107 | def to_emoji(value: bool) -> str: 108 | return "\U00002705" if value else "\U0000274C" 109 | 110 | 111 | def insert_markdown_section(file_path, section_title, section_text, level): 112 | with open(file_path, "r") as file: 113 | lines = file.readlines() 114 | 115 | header_prefix = "#" * level 116 | new_section = f"{header_prefix} {section_title}\n\n{section_text}\n\n" 117 | 118 | # Find the first section with the specified level 119 | line_number = -1 120 | for i, line in enumerate(lines): 121 | if line.startswith(header_prefix): 122 | line_number = i 123 | break 124 | 125 | if line_number != -1: 126 | lines.insert(line_number, new_section) 127 | else: 128 | print( 129 | f"Markdown file was of unexpected format. No section of level {level} found. " 130 | "Did not write results." 131 | ) 132 | return 133 | 134 | # Write the file 135 | with open(file_path, "w") as file: 136 | file.writelines(lines) 137 | 138 | 139 | def ask_yes_no(question: str) -> bool: 140 | while True: 141 | response = input(question + " (y/n): ").lower().strip() 142 | if response == "y": 143 | return True 144 | elif response == "n": 145 | return False 146 | else: 147 | print("Please enter either 'y' or 'n'.") 148 | 149 | 150 | if __name__ == "__main__": 151 | run(main) 152 | -------------------------------------------------------------------------------- /scripts/clean_benchmarks.py: -------------------------------------------------------------------------------- 1 | # list all folders in benchmark folder 2 | # for each folder, run the benchmark 3 | 4 | import os 5 | import shutil 6 | 7 | from pathlib import Path 8 | 9 | from typer import run 10 | 11 | 12 | def main(): 13 | benchmarks = Path("benchmark") 14 | 15 | for benchmark in benchmarks.iterdir(): 16 | if benchmark.is_dir(): 17 | print(f"Cleaning {benchmark}") 18 | for path in benchmark.iterdir(): 19 | if path.name in ["prompt", "main_prompt"]: 20 | continue 21 | 22 | # Get filename of Path object 23 | if path.is_dir(): 24 | # delete the entire directory 25 | shutil.rmtree(path) 26 | else: 27 | # delete the file 28 | os.remove(path) 29 | 30 | 31 | if __name__ == "__main__": 32 | run(main) 33 | -------------------------------------------------------------------------------- /scripts/print_chat.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import typer 4 | 5 | from termcolor import colored 6 | 7 | app = typer.Typer() 8 | 9 | 10 | def pretty_print_conversation(messages): 11 | role_to_color = { 12 | "system": "red", 13 | "user": "green", 14 | "assistant": "blue", 15 | "function": "magenta", 16 | } 17 | formatted_messages = [] 18 | for message in messages: 19 | if message["role"] == "function": 20 | formatted_messages.append( 21 | f"function ({message['name']}): {message['content']}\n" 22 | ) 23 | else: 24 | assistant_content = ( 25 | message["function_call"] 26 | if message.get("function_call") 27 | else message["content"] 28 | ) 29 | role_to_message = { 30 | "system": f"system: {message['content']}\n", 31 | "user": f"user: {message['content']}\n", 32 | "assistant": f"assistant: {assistant_content}\n", 33 | } 34 | formatted_messages.append(role_to_message[message["role"]]) 35 | 36 | for formatted_message in formatted_messages: 37 | role = messages[formatted_messages.index(formatted_message)]["role"] 38 | color = role_to_color[role] 39 | print(colored(formatted_message, color)) 40 | 41 | 42 | @app.command() 43 | def main( 44 | messages_path: str, 45 | ): 46 | with open(messages_path) as f: 47 | messages = json.load(f) 48 | 49 | pretty_print_conversation(messages) 50 | 51 | 52 | if __name__ == "__main__": 53 | app() 54 | -------------------------------------------------------------------------------- /scripts/rerun_edited_message_logs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pathlib 3 | 4 | from typing import Union 5 | 6 | import typer 7 | 8 | from gpt_engineer.ai import AI 9 | from gpt_engineer.chat_to_files import to_files 10 | 11 | app = typer.Typer() 12 | 13 | 14 | @app.command() 15 | def main( 16 | messages_path: str, 17 | out_path: Union[str, None] = None, 18 | model: str = "gpt-4", 19 | temperature: float = 0.1, 20 | ): 21 | ai = AI( 22 | model=model, 23 | temperature=temperature, 24 | ) 25 | 26 | with open(messages_path) as f: 27 | messages = json.load(f) 28 | 29 | messages = ai.next(messages) 30 | 31 | if out_path: 32 | to_files(messages[-1]["content"], out_path) 33 | with open(pathlib.Path(out_path) / "all_output.txt", "w") as f: 34 | json.dump(messages[-1]["content"], f) 35 | 36 | 37 | if __name__ == "__main__": 38 | app() 39 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/morph-labs/gpt-engineer/900202a5498af365e91a46685afe5af787c05b3b/tests/__init__.py -------------------------------------------------------------------------------- /tests/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/morph-labs/gpt-engineer/900202a5498af365e91a46685afe5af787c05b3b/tests/steps/__init__.py -------------------------------------------------------------------------------- /tests/steps/test_archive.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from unittest.mock import MagicMock 5 | 6 | from gpt_engineer.db import DB, DBs, archive 7 | 8 | 9 | def freeze_at(monkeypatch, time): 10 | datetime_mock = MagicMock(wraps=datetime.datetime) 11 | datetime_mock.now.return_value = time 12 | monkeypatch.setattr(datetime, "datetime", datetime_mock) 13 | 14 | 15 | def setup_dbs(tmp_path, dir_names): 16 | directories = [tmp_path / name for name in dir_names] 17 | 18 | # Create DB objects 19 | dbs = [DB(dir) for dir in directories] 20 | 21 | # Create DBs instance 22 | return DBs(*dbs) 23 | 24 | 25 | def test_archive(tmp_path, monkeypatch): 26 | dbs = setup_dbs( 27 | tmp_path, ["memory", "logs", "preprompts", "input", "workspace", "archive"] 28 | ) 29 | freeze_at(monkeypatch, datetime.datetime(2020, 12, 25, 17, 5, 55)) 30 | archive(dbs) 31 | assert not os.path.exists(tmp_path / "memory") 32 | assert not os.path.exists(tmp_path / "workspace") 33 | assert os.path.isdir(tmp_path / "archive" / "20201225_170555") 34 | 35 | dbs = setup_dbs( 36 | tmp_path, ["memory", "logs", "preprompts", "input", "workspace", "archive"] 37 | ) 38 | freeze_at(monkeypatch, datetime.datetime(2022, 8, 14, 8, 5, 12)) 39 | archive(dbs) 40 | assert not os.path.exists(tmp_path / "memory") 41 | assert not os.path.exists(tmp_path / "workspace") 42 | assert os.path.isdir(tmp_path / "archive" / "20201225_170555") 43 | assert os.path.isdir(tmp_path / "archive" / "20220814_080512") 44 | -------------------------------------------------------------------------------- /tests/test_ai.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from gpt_engineer.ai import AI 4 | 5 | 6 | @pytest.mark.xfail(reason="Constructor assumes API access") 7 | def test_ai(): 8 | AI() 9 | # TODO Assert that methods behave and not only constructor. 10 | -------------------------------------------------------------------------------- /tests/test_chat_to_files.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from gpt_engineer.chat_to_files import to_files 4 | 5 | 6 | def test_to_files(): 7 | chat = textwrap.dedent( 8 | """ 9 | This is a sample program. 10 | 11 | file1.py 12 | ```python 13 | print("Hello, World!") 14 | ``` 15 | 16 | file2.py 17 | ```python 18 | def add(a, b): 19 | return a + b 20 | ``` 21 | """ 22 | ) 23 | 24 | workspace = {} 25 | to_files(chat, workspace) 26 | 27 | assert workspace["all_output.txt"] == chat 28 | 29 | expected_files = { 30 | "file1.py": 'print("Hello, World!")\n', 31 | "file2.py": "def add(a, b):\n return a + b\n", 32 | "README.md": "\nThis is a sample program.\n\nfile1.py\n", 33 | } 34 | 35 | for file_name, file_content in expected_files.items(): 36 | assert workspace[file_name] == file_content 37 | 38 | 39 | def test_to_files_with_square_brackets(): 40 | chat = textwrap.dedent( 41 | """ 42 | This is a sample program. 43 | 44 | [file1.py] 45 | ```python 46 | print("Hello, World!") 47 | ``` 48 | 49 | [file2.py] 50 | ```python 51 | def add(a, b): 52 | return a + b 53 | ``` 54 | """ 55 | ) 56 | workspace = {} 57 | to_files(chat, workspace) 58 | 59 | assert workspace["all_output.txt"] == chat 60 | 61 | expected_files = { 62 | "file1.py": 'print("Hello, World!")\n', 63 | "file2.py": "def add(a, b):\n return a + b\n", 64 | "README.md": "\nThis is a sample program.\n\n[file1.py]\n", 65 | } 66 | 67 | for file_name, file_content in expected_files.items(): 68 | assert workspace[file_name] == file_content 69 | 70 | 71 | def test_files_with_brackets_in_name(): 72 | chat = textwrap.dedent( 73 | """ 74 | This is a sample program. 75 | 76 | [id].jsx 77 | ```javascript 78 | console.log("Hello, World!") 79 | ``` 80 | """ 81 | ) 82 | 83 | workspace = {} 84 | to_files(chat, workspace) 85 | 86 | assert workspace["all_output.txt"] == chat 87 | 88 | expected_files = { 89 | "[id].jsx": 'console.log("Hello, World!")\n', 90 | "README.md": "\nThis is a sample program.\n\n[id].jsx\n", 91 | } 92 | 93 | for file_name, file_content in expected_files.items(): 94 | assert workspace[file_name] == file_content 95 | 96 | 97 | def test_files_with_file_colon(): 98 | chat = textwrap.dedent( 99 | """ 100 | This is a sample program. 101 | 102 | [FILE: file1.py] 103 | ```python 104 | print("Hello, World!") 105 | ``` 106 | """ 107 | ) 108 | 109 | workspace = {} 110 | to_files(chat, workspace) 111 | 112 | assert workspace["all_output.txt"] == chat 113 | 114 | expected_files = { 115 | "file1.py": 'print("Hello, World!")\n', 116 | "README.md": "\nThis is a sample program.\n\n[FILE: file1.py]\n", 117 | } 118 | 119 | for file_name, file_content in expected_files.items(): 120 | assert workspace[file_name] == file_content 121 | 122 | 123 | def test_files_with_back_tick(): 124 | chat = textwrap.dedent( 125 | """ 126 | This is a sample program. 127 | 128 | `file1.py` 129 | ```python 130 | print("Hello, World!") 131 | ``` 132 | """ 133 | ) 134 | 135 | workspace = {} 136 | to_files(chat, workspace) 137 | 138 | assert workspace["all_output.txt"] == chat 139 | 140 | expected_files = { 141 | "file1.py": 'print("Hello, World!")\n', 142 | "README.md": "\nThis is a sample program.\n\n`file1.py`\n", 143 | } 144 | 145 | for file_name, file_content in expected_files.items(): 146 | assert workspace[file_name] == file_content 147 | 148 | 149 | def test_files_with_newline_between(): 150 | chat = textwrap.dedent( 151 | """ 152 | This is a sample program. 153 | 154 | file1.py 155 | 156 | ```python 157 | print("Hello, World!") 158 | ``` 159 | """ 160 | ) 161 | 162 | workspace = {} 163 | to_files(chat, workspace) 164 | 165 | assert workspace["all_output.txt"] == chat 166 | 167 | expected_files = { 168 | "file1.py": 'print("Hello, World!")\n', 169 | "README.md": "\nThis is a sample program.\n\nfile1.py\n\n", 170 | } 171 | 172 | for file_name, file_content in expected_files.items(): 173 | assert workspace[file_name] == file_content 174 | 175 | 176 | def test_files_with_newline_between_header(): 177 | chat = textwrap.dedent( 178 | """ 179 | This is a sample program. 180 | 181 | ## file1.py 182 | 183 | ```python 184 | print("Hello, World!") 185 | ``` 186 | """ 187 | ) 188 | 189 | workspace = {} 190 | to_files(chat, workspace) 191 | 192 | assert workspace["all_output.txt"] == chat 193 | 194 | expected_files = { 195 | "file1.py": 'print("Hello, World!")\n', 196 | "README.md": "\nThis is a sample program.\n\n## file1.py\n\n", 197 | } 198 | 199 | for file_name, file_content in expected_files.items(): 200 | assert workspace[file_name] == file_content 201 | -------------------------------------------------------------------------------- /tests/test_collect.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from unittest.mock import MagicMock 5 | 6 | import pytest 7 | import rudderstack.analytics as rudder_analytics 8 | 9 | from gpt_engineer.collect import collect_learnings, steps_file_hash 10 | from gpt_engineer.db import DB, DBs 11 | from gpt_engineer.learning import extract_learning 12 | from gpt_engineer.steps import gen_code 13 | 14 | 15 | def test_collect_learnings(monkeypatch): 16 | monkeypatch.setattr(os, "environ", {"COLLECT_LEARNINGS_OPT_IN": "true"}) 17 | monkeypatch.setattr(rudder_analytics, "track", MagicMock()) 18 | 19 | model = "test_model" 20 | temperature = 0.5 21 | steps = [gen_code] 22 | dbs = DBs(DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp")) 23 | dbs.input = { 24 | "prompt": "test prompt\n with newlines", 25 | "feedback": "test feedback", 26 | } 27 | code = "this is output\n\nit contains code" 28 | dbs.logs = {gen_code.__name__: json.dumps([{"role": "system", "content": code}])} 29 | dbs.workspace = {"all_output.txt": "test workspace\n" + code} 30 | 31 | collect_learnings(model, temperature, steps, dbs) 32 | 33 | learnings = extract_learning( 34 | model, temperature, steps, dbs, steps_file_hash=steps_file_hash() 35 | ) 36 | assert rudder_analytics.track.call_count == 1 37 | assert rudder_analytics.track.call_args[1]["event"] == "learning" 38 | a = { 39 | k: v 40 | for k, v in rudder_analytics.track.call_args[1]["properties"].items() 41 | if k != "timestamp" 42 | } 43 | b = {k: v for k, v in learnings.to_dict().items() if k != "timestamp"} 44 | assert a == b 45 | 46 | assert code in learnings.logs 47 | assert code in learnings.workspace 48 | 49 | 50 | if __name__ == "__main__": 51 | pytest.main(["-v"]) 52 | -------------------------------------------------------------------------------- /tests/test_db.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from gpt_engineer.db import DB, DBs 4 | 5 | 6 | def test_DB_operations(tmp_path): 7 | # Test initialization 8 | db = DB(tmp_path) 9 | 10 | # Test __setitem__ 11 | db["test_key"] = "test_value" 12 | 13 | assert (tmp_path / "test_key").is_file() 14 | 15 | # Test __getitem__ 16 | val = db["test_key"] 17 | 18 | assert val == "test_value" 19 | 20 | # Test error on getting non-existent key 21 | with pytest.raises(KeyError): 22 | db["non_existent"] 23 | 24 | # Test error on setting non-str or non-bytes value 25 | with pytest.raises(TypeError): 26 | db["key"] = ["Invalid", "value"] 27 | 28 | 29 | def test_DBs_initialization(tmp_path): 30 | dir_names = ["memory", "logs", "preprompts", "input", "workspace", "archive"] 31 | directories = [tmp_path / name for name in dir_names] 32 | 33 | # Create DB objects 34 | dbs = [DB(dir) for dir in directories] 35 | 36 | # Create DB instance 37 | dbs_instance = DBs(*dbs) 38 | 39 | assert isinstance(dbs_instance.memory, DB) 40 | assert isinstance(dbs_instance.logs, DB) 41 | assert isinstance(dbs_instance.preprompts, DB) 42 | assert isinstance(dbs_instance.input, DB) 43 | assert isinstance(dbs_instance.workspace, DB) 44 | assert isinstance(dbs_instance.archive, DB) 45 | 46 | 47 | def test_invalid_path(): 48 | with pytest.raises((PermissionError, OSError)): 49 | # Test with a path that will raise a permission error 50 | DB("/root/test") 51 | 52 | 53 | def test_large_files(tmp_path): 54 | db = DB(tmp_path) 55 | large_content = "a" * (10**6) # 1MB of data 56 | 57 | # Test write large files 58 | db["large_file"] = large_content 59 | 60 | # Test read large files 61 | assert db["large_file"] == large_content 62 | 63 | 64 | def test_concurrent_access(tmp_path): 65 | import threading 66 | 67 | db = DB(tmp_path) 68 | 69 | num_threads = 10 70 | num_writes = 1000 71 | 72 | def write_to_db(thread_id): 73 | for i in range(num_writes): 74 | key = f"thread{thread_id}_write{i}" 75 | db[key] = str(i) 76 | 77 | threads = [] 78 | for thread_id in range(num_threads): 79 | t = threading.Thread(target=write_to_db, args=(thread_id,)) 80 | t.start() 81 | threads.append(t) 82 | 83 | for t in threads: 84 | t.join() 85 | 86 | # Verify that all expected data was written 87 | for thread_id in range(num_threads): 88 | for i in range(num_writes): 89 | key = f"thread{thread_id}_write{i}" 90 | assert key in db # using __contains__ now 91 | assert db[key] == str(i) 92 | 93 | 94 | def test_error_messages(tmp_path): 95 | db = DB(tmp_path) 96 | 97 | with pytest.raises(TypeError) as e: 98 | db["key"] = ["Invalid", "value"] 99 | 100 | assert str(e.value) == "val must be either a str or bytes" 101 | 102 | 103 | def test_DBs_instantiation_with_wrong_number_of_arguments(tmp_path): 104 | db = DB(tmp_path) 105 | 106 | with pytest.raises(TypeError): 107 | DBs(db, db, db) 108 | 109 | with pytest.raises(TypeError): 110 | DBs(db, db, db, db, db, db, db) 111 | 112 | 113 | def test_DBs_dataclass_attributes(tmp_path): 114 | dir_names = ["memory", "logs", "preprompts", "input", "workspace", "archive"] 115 | directories = [tmp_path / name for name in dir_names] 116 | 117 | # Create DB objects 118 | dbs = [DB(dir) for dir in directories] 119 | 120 | # Create DBs instance 121 | dbs_instance = DBs(*dbs) 122 | 123 | assert dbs_instance.memory == dbs[0] 124 | assert dbs_instance.logs == dbs[1] 125 | assert dbs_instance.preprompts == dbs[2] 126 | assert dbs_instance.input == dbs[3] 127 | assert dbs_instance.workspace == dbs[4] 128 | --------------------------------------------------------------------------------