├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── ISSUE_TEMPLATE.md
    │   └── issue-template.md
    ├── PULL_REQUEST_TEMPLATE
    │   └── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── ci.yaml
    │   ├── codeql.yml
    │   ├── pre-commit.yaml
    │   └── release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── DISCLAIMER.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── ROADMAP.md
├── TERMS_OF_USE.md
├── WINDOWS_README.md
├── benchmark
    ├── RESULTS.md
    ├── currency_converter
    │   └── prompt
    ├── file_explorer
    │   └── prompt
    ├── file_organizer
    │   └── prompt
    ├── image_resizer
    │   └── prompt
    ├── markdown_editor
    │   └── prompt
    ├── password_generator
    │   └── prompt
    ├── pomodoro_timer
    │   └── prompt
    ├── timer_app
    │   └── prompt
    ├── todo_list
    │   └── prompt
    └── url_shortener
    │   └── prompt
├── gpt_engineer
    ├── __init__.py
    ├── ai.py
    ├── chat_to_files.py
    ├── collect.py
    ├── db.py
    ├── domain.py
    ├── learning.py
    ├── main.py
    ├── preprompts
    │   ├── fix_code
    │   ├── generate
    │   ├── philosophy
    │   ├── qa
    │   ├── respec
    │   ├── spec
    │   ├── unit_tests
    │   ├── use_feedback
    │   └── use_qa
    └── steps.py
├── projects
    └── example
    │   └── prompt
├── pyproject.toml
├── scripts
    ├── benchmark.py
    ├── clean_benchmarks.py
    ├── print_chat.py
    └── rerun_edited_message_logs.py
└── tests
    ├── __init__.py
    ├── steps
        ├── __init__.py
        └── test_archive.py
    ├── test_ai.py
    ├── test_chat_to_files.py
    ├── test_collect.py
    └── test_db.py


/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity or expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting using an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | <anton.osika@gmail.com>.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of reporters of incidents.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.1, available at
118 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
119 | 
120 | Community Impact Guidelines were inspired by
121 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
122 | 
123 | For answers to common questions about this code of conduct, see the FAQ at
124 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
125 | [https://www.contributor-covenant.org/translations][translations].
126 | 
127 | [homepage]: https://www.contributor-covenant.org
128 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
129 | [Mozilla CoC]: https://github.com/mozilla/diversity
130 | [FAQ]: https://www.contributor-covenant.org/faq
131 | [translations]: https://www.contributor-covenant.org/translations
132 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to GPT Engineer
  2 | 
  3 | By participating in this project, you agree to abide by the [code of conduct](CODE_OF_CONDUCT.md).
  4 | 
  5 | ## Getting Started
  6 | 
  7 | To get started with contributing, please follow these steps:
  8 | 
  9 | 1. Fork the repository and clone it to your local machine.
 10 | 2. Install any necessary dependencies.
 11 | 3. Create a new branch for your changes: `git checkout -b my-branch-name`.
 12 | 4. Make your desired changes or additions.
 13 | 5. Run the tests to ensure everything is working as expected.
 14 | 6. Commit your changes: `git commit -m "Descriptive commit message"`.
 15 | 7. Push to the branch: `git push origin my-branch-name`.
 16 | 8. Submit a pull request to the `main` branch of the original repository.
 17 | 
 18 | ## Code Style
 19 | 
 20 | Please make sure to follow the established code style guidelines for this project. Consistent code style helps maintain readability and makes it easier for others to contribute to the project.
 21 | 
 22 | To enforce this we use [`pre-commit`](https://pre-commit.com/) to run [`black`](https://black.readthedocs.io/en/stable/index.html) and [`ruff`](https://beta.ruff.rs/docs/) on every commit.
 23 | 
 24 | `pre-commit` is part of our `requirements.txt` file so you should already have it installed. If you don't, you can install the library via pip with:
 25 | 
 26 | ```bash
 27 | $ pip install -e .
 28 | 
 29 | # And then install the `pre-commit` hooks with:
 30 | 
 31 | $ pre-commit install
 32 | 
 33 | # output:
 34 | pre-commit installed at .git/hooks/pre-commit
 35 | ```
 36 | 
 37 | Or you could just run `make dev-install` to install the dependencies and the hooks.
 38 | 
 39 | If you are not familiar with the concept of [git hooks](https://git-scm.com/docs/githooks) and/or [`pre-commit`](https://pre-commit.com/) please read the documentation to understand how they work.
 40 | 
 41 | As an introduction of the actual workflow, here is an example of the process you will encounter when you make a commit:
 42 | 
 43 | Let's add a file we have modified with some errors, see how the pre-commit hooks run `black` and fails.
 44 | `black` is set to automatically fix the issues it finds:
 45 | 
 46 | ```bash
 47 | $ git add chat_to_files.py
 48 | $ git commit -m "commit message"
 49 | black....................................................................Failed
 50 | - hook id: black
 51 | - files were modified by this hook
 52 | 
 53 | reformatted chat_to_files.py
 54 | 
 55 | All done! ✨ 🍰 ✨
 56 | 1 file reformatted.
 57 | ```
 58 | 
 59 | You can see that `chat_to_files.py` is both staged and not staged for commit. This is because `black` has formatted it and now it is different from the version you have in your working directory. To fix this you can simply run `git add chat_to_files.py` again and now you can commit your changes.
 60 | 
 61 | ```bash
 62 | $ git status
 63 | On branch pre-commit-setup
 64 | Changes to be committed:
 65 |   (use "git restore --staged <file>..." to unstage)
 66 |     modified:   chat_to_files.py
 67 | 
 68 | Changes not staged for commit:
 69 |   (use "git add <file>..." to update what will be committed)
 70 |   (use "git restore <file>..." to discard changes in working directory)
 71 |     modified:   chat_to_files.py
 72 | ```
 73 | 
 74 | Now let's add the file again to include the latest commits and see how `ruff` fails.
 75 | 
 76 | ```bash
 77 | $ git add chat_to_files.py
 78 | $ git commit -m "commit message"
 79 | black....................................................................Passed
 80 | ruff.....................................................................Failed
 81 | - hook id: ruff
 82 | - exit code: 1
 83 | - files were modified by this hook
 84 | 
 85 | Found 2 errors (2 fixed, 0 remaining).
 86 | ```
 87 | 
 88 | Same as before, you can see that `chat_to_files.py` is both staged and not staged for commit. This is because `ruff` has formatted it and now it is different from the version you have in your working directory. To fix this you can simply run `git add chat_to_files.py` again and now you can commit your changes.
 89 | 
 90 | ```bash
 91 | $ git add chat_to_files.py
 92 | $ git commit -m "commit message"
 93 | black....................................................................Passed
 94 | ruff.....................................................................Passed
 95 | fix end of files.........................................................Passed
 96 | [pre-commit-setup f00c0ce] testing
 97 |  1 file changed, 1 insertion(+), 1 deletion(-)
 98 | ```
 99 | 
100 | Now your file has been committed and you can push your changes.
101 | 
102 | At the beginning this might seem like a tedious process (having to add the file again after `black` and `ruff` have modified it) but it is actually very useful. It allows you to see what changes `black` and `ruff` have made to your files and make sure that they are correct before you commit them.
103 | 
104 | ## Issue Tracker
105 | 
106 | If you encounter any bugs, issues, or have feature requests, please [create a new issue](https://github.com/AntonOsika/gpt-engineer/issues/new) on the project's GitHub repository. Provide a clear and descriptive title along with relevant details to help us address the problem or understand your request.
107 | 
108 | ## Licensing
109 | 
110 | By contributing to GPT Engineer, you agree that your contributions will be licensed under the [LICENSE](../LICENSE) file of the project.
111 | 
112 | Thank you for your interest in contributing to GPT Engineer! We appreciate your support and look forward to your contributions.
113 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [antonosika]
4 | patreon: gpt-engineer
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.**
 2 | 
 3 | # Issue Template
 4 | 
 5 | ## Expected Behavior
 6 | 
 7 | Please describe the behavior you are expecting.
 8 | 
 9 | ## Current Behavior
10 | 
11 | What is the current behavior?
12 | 
13 | ## Failure Information (for bugs)
14 | 
15 | Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template.
16 | 
17 | ### Steps to Reproduce
18 | 
19 | Please provide detailed steps for reproducing the issue.
20 | 
21 | 1. step 1
22 | 2. step 2
23 | 3. you get it...
24 | 
25 | ### Failure Logs
26 | 
27 | Please include any relevant log snippets or files here.
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Issue template
 3 | about: All templates should use this format unless there is a reason not to
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.**
10 | 
11 | ## Issue Template
12 | 
13 | ## Expected Behavior
14 | 
15 | Please describe the behavior you are expecting.
16 | 
17 | ## Current Behavior
18 | 
19 | What is the current behavior?
20 | 
21 | ## Failure Information (for bugs)
22 | 
23 | Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template.
24 | 
25 | ### Steps to Reproduce
26 | 
27 | Please provide detailed steps for reproducing the issue.
28 | 
29 | 1. step 1
30 | 2. step 2
31 | 3. you get it...
32 | 
33 | ### Failure Logs
34 | 
35 | Please include any relevant log snippets or files here.
36 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **YOU MAY DELETE THE ENTIRE TEMPLATE BELOW.**
 2 | 
 3 | ## How Has This Been Tested?
 4 | 
 5 | Please describe if you have either:
 6 | 
 7 | - Generated the "example" project
 8 | - Ran the entire benchmark suite
 9 | - Something else
10 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: Pip install and pytest
 2 | on:
 3 |   pull_request:
 4 |     branches: [main]
 5 |     paths:
 6 |       - "**.py"
 7 |   push:
 8 |     branches: [main]
 9 |     paths:
10 |       - "**.py"
11 | 
12 | jobs:
13 |   test:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v3
17 | 
18 |       - uses: actions/setup-python@v4
19 |         with:
20 |           python-version: "3.10"
21 |           cache: pip
22 | 
23 |       - name: Install package
24 |         run: pip install -e .
25 | 
26 |       - name: Install test runner
27 |         run: pip install pytest pytest-cov
28 | 
29 |       - name: Run unit tests
30 |         run: pytest --cov=gpt_engineer
31 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ 'main' ]
 6 |   pull_request:
 7 |     # The branches below must be a subset of the branches above
 8 |     branches: [ 'main' ]
 9 |   schedule:
10 |     - cron: '26 2 * * 6'
11 | 
12 | jobs:
13 |   analyze:
14 |     name: Analyze
15 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
16 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
17 |     permissions:
18 |       actions: read
19 |       contents: read
20 |       security-events: write
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         language: [ 'python' ]
26 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
27 |         # Use only 'java' to analyze code written in Java, Kotlin or both
28 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
29 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
30 | 
31 |     steps:
32 |     - name: Checkout repository
33 |       uses: actions/checkout@v3
34 | 
35 |     # Initializes the CodeQL tools for scanning.
36 |     - name: Initialize CodeQL
37 |       uses: github/codeql-action/init@v2
38 |       with:
39 |         languages: ${{ matrix.language }}
40 |         # If you wish to specify custom queries, you can do so here or in a config file.
41 |         # By default, queries listed here will override any specified in a config file.
42 |         # Prefix the list here with "+" to use these queries and those in the config file.
43 | 
44 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
45 |         # queries: security-extended,security-and-quality
46 | 
47 | 
48 |     # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
49 |     # If this step fails, then you should remove it and run the build manually (see below)
50 |     - name: Autobuild
51 |       uses: github/codeql-action/autobuild@v2
52 | 
53 |     # ℹ️ Command-line programs to run using the OS shell.
54 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
55 | 
56 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
57 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
58 | 
59 |     # - run: |
60 |     #     echo "Run, Build Application using script"
61 |     #     ./location_of_script_within_repo/buildscript.sh
62 | 
63 |     - name: Perform CodeQL Analysis
64 |       uses: github/codeql-action/analyze@v2
65 |       with:
66 |         category: "/language:${{matrix.language}}"
67 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-latest
11 | 
12 |     permissions:
13 |       contents: write
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v3
17 | 
18 |       - uses: actions/setup-python@v4
19 | 
20 |       - uses: pre-commit/action@v3.0.0
21 |         with:
22 |           extra_args: --all-files
23 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Build and publish Python packages to PyPI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   release:
 6 |     types:
 7 |       - published
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version:
15 |           - "3.10"
16 |     steps:
17 |       - uses: actions/checkout@v3
18 | 
19 |       - uses: actions/setup-python@v4
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 |           cache: pip
23 | 
24 |       - name: Install build tool
25 |         run: pip install build
26 | 
27 |       - name: Build package
28 |         run: python -m build
29 | 
30 |       - name: Upload package as build artifact
31 |         uses: actions/upload-artifact@v3
32 |         with:
33 |           name: package
34 |           path: dist/
35 | 
36 |   publish:
37 |     runs-on: ubuntu-latest
38 |     needs: build
39 |     environment:
40 |       name: pypi
41 |       url: https://pypi.org/p/gpt-engineer
42 |     permissions:
43 |       id-token: write
44 |     steps:
45 |       - name: Collect packages to release
46 |         uses: actions/download-artifact@v3
47 |         with:
48 |           name: package
49 |           path: dist/
50 | 
51 |       - name: Publish packages to PyPI
52 |         uses: pypa/gh-action-pypi-publish@release/v1
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/ignore-files/ for more about ignoring files.
 2 | 
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # Distribution / packaging
 9 | dist/
10 | build/
11 | *.egg-info/
12 | *.egg
13 | 
14 | # Virtual environments
15 | .env
16 | .env.sh
17 | venv/
18 | ENV/
19 | 
20 | # IDE-specific files
21 | .vscode/
22 | .idea/
23 | 
24 | # Compiled Python modules
25 | *.pyc
26 | *.pyo
27 | *.pyd
28 | 
29 | # Python testing
30 | .pytest_cache/
31 | .ruff_cache/
32 | .coverage
33 | .mypy_cache/
34 | 
35 | # macOS specific files
36 | .DS_Store
37 | 
38 | # Windows specific files
39 | Thumbs.db
40 | 
41 | # this application's specific files
42 | archive
43 | 
44 | # any log file
45 | *log.txt
46 | todo
47 | scratchpad
48 | 
49 | # Ignore GPT Engineer files
50 | projects
51 | !projects/example
52 | 
53 | # Pyenv
54 | .python-version
55 | 
56 | # Benchmark files
57 | benchmark
58 | !benchmark/*/prompt
59 | 
60 | .gpte_consent
61 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | fail_fast: true
 4 | default_stages: [commit]
 5 | 
 6 | repos:
 7 |   - repo: https://github.com/pre-commit/mirrors-mypy
 8 |     rev: v1.3.0
 9 |     hooks:
10 |       - id: mypy
11 |         additional_dependencies: [types-tabulate==0.9.0.2]
12 | 
13 |   - repo: https://github.com/psf/black
14 |     rev: 23.3.0
15 |     hooks:
16 |       - id: black
17 |         args: [--config, pyproject.toml]
18 |         types: [python]
19 | 
20 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
21 |     rev: "v0.0.272"
22 |     hooks:
23 |       - id: ruff
24 |         args: [--fix, --exit-non-zero-on-fix]
25 | 
26 |   - repo: https://github.com/pre-commit/pre-commit-hooks
27 |     rev: v4.4.0
28 |     hooks:
29 |       - id: check-toml
30 |       - id: check-yaml
31 |       - id: detect-private-key
32 |       - id: end-of-file-fixer
33 |       - id: trailing-whitespace
34 | 


--------------------------------------------------------------------------------
/DISCLAIMER.md:
--------------------------------------------------------------------------------
 1 | # Disclaimer
 2 | 
 3 | gpt-engineer is an experimental application and is provided "as-is" without any warranty, express or implied. By using this software, you agree to assume all risks associated with its use, including but not limited to data loss, system failure, or any other issues that may arise.
 4 | 
 5 | The developers and contributors of this project do not accept any responsibility or liability for any losses, damages, or other consequences that may occur as a result of using this software. You are solely responsible for any decisions and actions taken based on the information provided by gpt-engineer.
 6 | 
 7 | Please note that the use of the GPT-4 language model can be expensive due to its token usage. By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges.
 8 | 
 9 | As an autonomous experiment, gpt-engineer may generate code or take actions that are not in line with real-world business practices or legal requirements. It is your responsibility to ensure that any actions or decisions made by the generated code comply with all applicable laws, regulations, and ethical standards. The developers and contributors of this project shall not be held responsible for any consequences arising from the use of this software.
10 | 
11 | By using gpt-engineer, you agree to indemnify, defend, and hold harmless the developers, contributors, and any affiliated parties from and against any and all claims, damages, losses, liabilities, costs, and expenses (including reasonable attorneys' fees) arising from your use of this software or your violation of these terms.
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Anton Osika
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include gpt_engineer/preprompts *
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #Sets the default shell for executing commands as /bin/bash and specifies command should be executed in a Bash shell.
 2 | SHELL := /bin/bash
 3 | 
 4 | # Color codes for terminal output
 5 | COLOR_RESET=\033[0m
 6 | COLOR_CYAN=\033[1;36m
 7 | COLOR_GREEN=\033[1;32m
 8 | 
 9 | # Defines the targets help, install, dev-install, and run as phony targets. Phony targets are targets that are not really the name of files that are to be built. Instead, they are treated as commands.
10 | .PHONY: help install run
11 | 
12 | #sets the default goal to help when no target is specified on the command line.
13 | .DEFAULT_GOAL := help
14 | 
15 | #Disables echoing of commands. The commands executed by Makefile will not be printed on the console during execution.
16 | .SILENT:
17 | 
18 | #Sets the variable name to the second word from the MAKECMDGOALS. MAKECMDGOALS is a variable that contains the command-line targets specified when running make. In this case, the variable name will hold the value of the folder name specified when running the run target.
19 | name := $(word 2,$(MAKECMDGOALS))
20 | 
21 | #Defines a target named help.
22 | help:
23 | 	@echo "Please use 'make <target>' where <target> is one of the following:"
24 | 	@echo "  help           	Return this message with usage instructions."
25 | 	@echo "  install        	Will install the dependencies and create a virtual environment."
26 | 	@echo "  run <folder_name>  Runs GPT Engineer on the folder with the given name."
27 | 
28 | #Defines a target named install. This target will create a virtual environment, upgrade pip, install the dependencies, and install the pre-commit hooks. This means that running make install will first execute the create-venv target, then the upgrade-pip target, then the install-dependencies target, and finally the install-pre-commit target.
29 | install: create-venv upgrade-pip install-dependencies install-pre-commit farewell
30 | 
31 | #Defines a target named create-venv. This target will create a virtual environment in the venv folder.
32 | create-venv:
33 | 	@echo -e "$(COLOR_CYAN)Creating virtual environment...$(COLOR_RESET)" && \
34 | 	python -m venv venv
35 | 
36 | #Defines a target named upgrade-pip. This target will upgrade pip to the latest version.
37 | upgrade-pip:
38 | 	@echo -e "$(COLOR_CYAN)Upgrading pip...$(COLOR_RESET)" && \
39 | 	source venv/bin/activate && \
40 | 	pip install --upgrade pip >> /dev/null
41 | 
42 | #Defines a target named install-dependencies. This target will install the dependencies.
43 | install-dependencies:
44 | 	@echo -e "$(COLOR_CYAN)Installing dependencies...$(COLOR_RESET)" && \
45 | 	source venv/bin/activate && \
46 | 	pip install -e . >> /dev/null
47 | 
48 | #Defines a target named install-pre-commit. This target will install the pre-commit hooks.
49 | install-pre-commit:
50 | 	@echo -e "$(COLOR_CYAN)Installing pre-commit hooks...$(COLOR_RESET)" && \
51 | 	source venv/bin/activate && \
52 | 	pre-commit install
53 | 
54 | #Defines a target named farewell. This target will print a farewell message.
55 | farewell:
56 | 	@echo -e "$(COLOR_GREEN)All done!$(COLOR_RESET)"
57 | 
58 | #Defines a target named run. This target will run GPT Engineer on the folder with the given name, name was defined earlier in the Makefile.
59 | run:
60 | 	@echo -e "$(COLOR_CYAN)Running GPT Engineer on $(COLOR_GREEN)$(name)$(COLOR_CYAN) folder...$(COLOR_RESET)" && \
61 | 	source venv/bin/activate && \
62 | 	gpt-engineer projects/$(name)
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPT Engineer
 2 | 
 3 | [![Discord Follow](https://dcbadge.vercel.app/api/server/8tcDQ89Ej2?style=flat)](https://discord.gg/8tcDQ89Ej2)
 4 | [![GitHub Repo stars](https://img.shields.io/github/stars/AntonOsika/gpt-engineer?style=social)](https://github.com/AntonOsika/gpt-engineer)
 5 | [![Twitter Follow](https://img.shields.io/twitter/follow/antonosika?style=social)](https://twitter.com/AntonOsika)
 6 | 
 7 | 
 8 | **Specify what you want it to build, the AI asks for clarification, and then builds it.**
 9 | 
10 | GPT Engineer is made to be easy to adapt, extend, and make your agent learn how you want your code to look. It generates an entire codebase based on a prompt.
11 | 
12 | [Demo](https://twitter.com/antonosika/status/1667641038104674306)
13 | 
14 | ## Project philosophy
15 | 
16 | - Simple to get value
17 | - Flexible and easy to add new own "AI steps". See `steps.py`.
18 | - Incrementally build towards a user experience of:
19 |   1. high level prompting
20 |   2. giving feedback to the AI that it will remember over time
21 | - Fast handovers back and forth between AI and human
22 | - Simplicity, all computation is "resumable" and persisted to the filesystem
23 | 
24 | ## Usage
25 | 
26 | Choose either **stable** or **development**.
27 | 
28 | For **stable** release:
29 | 
30 | - `pip install gpt-engineer`
31 | 
32 | For **development**:
33 | - `git clone https://github.com/AntonOsika/gpt-engineer.git`
34 | - `cd gpt-engineer`
35 | - `pip install -e .`
36 |   - (or: `make install && source venv/bin/activate` for a venv)
37 | 
38 | **Setup**
39 | 
40 | With an OpenAI API key (preferably with GPT-4 access) run:
41 | 
42 | - `export OPENAI_API_KEY=[your api key]`
43 | 
44 | Alternative for Windows
45 | 
46 | **Run**:
47 | 
48 | - Create an empty folder. If inside the repo, you can run:
49 |   - `cp -r projects/example/ projects/my-new-project`
50 | - Fill in the `prompt` file in your new folder
51 | - `gpt-engineer projects/my-new-project`
52 |   - (Note, `gpt-engineer --help` lets you see all available options. For example `--steps use_feedback` lets you improve/fix code in a project)
53 | 
54 | By running gpt-engineer you agree to our [terms](https://github.com/AntonOsika/gpt-engineer/blob/main/TERMS_OF_USE.md).
55 | 
56 | **Results**
57 | - Check the generated files in `projects/my-new-project/workspace`
58 | 
59 | 
60 | To **run in the browser** you can simply:
61 | 
62 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/AntonOsika/gpt-engineer/codespaces)
63 | 
64 | 
65 | 
66 | ## Features
67 | 
68 | You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder.
69 | 
70 | Editing the `preprompts`, and evolving how you write the project prompt, is how you make the agent remember things between projects.
71 | 
72 | Each step in `steps.py` will have its communication history with GPT4 stored in the logs folder, and can be rerun with `scripts/rerun_edited_message_logs.py`.
73 | 
74 | ## Vision
75 | The gpt-engineer community is building the **open platform for devs to tinker with and build their personal code-generation toolbox**.
76 | 
77 | If you are interested in contributing to this, we would be interested in having you.
78 | 
79 | If you want to see our broader ambitions, check out the [roadmap](https://github.com/AntonOsika/gpt-engineer/blob/main/ROADMAP.md), and join
80 | [discord](https://discord.gg/8tcDQ89Ej2)
81 | to get input on how you can [contribute](.github/CONTRIBUTING.md) to it.
82 | 
83 | We are currently looking for more maintainers and community organisers. Email anton.osika@gmail.com if you are interested in an official role.
84 | 
85 | 
86 | ## Example
87 | 
88 | https://github.com/AntonOsika/gpt-engineer/assets/4467025/6e362e45-4a94-4b0d-973d-393a31d92d9b
89 | 


--------------------------------------------------------------------------------
/ROADMAP.md:
--------------------------------------------------------------------------------
 1 | # Roadmap
 2 | 
 3 | There are three main milestones we believe will greatly increase gpt-engineer's reliability and capability:
 4 | - [x] Continuous evaluation of our progress 🎉
 5 | - [ ] Test code and fix errors with LLMs
 6 | - [ ] Make code generation become small, verifiable steps
 7 | 
 8 | ## Our current focus:
 9 | 
10 | - [x] **Continuous evaluation of progress 🎉**
11 |   - [x] Create a step that asks “did it run/work/perfect” in the end of each run [#240](https://github.com/AntonOsika/gpt-engineer/issues/240) 🎉
12 |   - [x] Collect a dataset for gpt engineer to learn from, by storing code generation runs 🎉
13 |   - [ ] Run the benchmark multiple times, and document the results for the different "step configs" [#239](https://github.com/AntonOsika/gpt-engineer/issues/239)
14 |   - [ ] Improve the default config based on results
15 | - [ ] **Self healing code**
16 |   - [ ] Run the generated tests
17 |   - [ ] Feed the results of failing tests back into LLM and ask it to fix the code
18 | - [ ] **Let human give feedback**
19 |   - [ ] Ask human for what is not working as expected in a loop, and feed it into LLM to fix the code, until the human is happy
20 | - [ ] **Improve existing projects**
21 |   - [ ] Decide on the "flow" for the CLI commands and where the project files are created
22 |   - [ ] Add an "improve code" command
23 |   - [ ] Architect how gpt-engineer becomes a platform
24 |   - [ ] Integrate Aider
25 | 
26 | ## Experimental research
27 | This is not our current focus, but if you are interested in experimenting: Please
28 | create a thread in Discord #general and share your intentions and your findings as you
29 | go along. High impact examples:
30 | - [ ] **Make code generation become small, verifiable steps**
31 |   - [ ] Ask GPT4 to decide how to sequence the entire generation, and do one
32 |   prompt for each subcomponent
33 |   - [ ] For each small part, generate tests for that subpart, and do the loop of running the tests for each part, feeding
34 | results into GPT4, and let it edit the code until they pass
35 | - [ ] **Ad hoc experiments**
36 |   - [ ] Try Microsoft guidance, and benchmark if this helps improve performance
37 |   - [ ] Dynamic planning: Let gpt-engineer plan which "steps" to carry out itself, depending on the
38 | task, by giving it few shot example of what are usually "the right-sized steps" to carry
39 | out for such projects
40 | 
41 | ## Codebase improvements
42 | By improving the codebase and developer ergonomics, we accelerate progress. Some examples:
43 | - [ ] Set up automatic PR review for all PRs with e.g. Codium pr-agent
44 | - [ ] LLM tests in CI: Run super small tests with GPT3.5 in CI, that check that simple code generation still works
45 | 
46 | # How you can help out
47 | 
48 | You can:
49 | 
50 | - Post a "design" as a google doc in our Discord and ask for feedback to address one of the items in the roadmap
51 | - Submit PRs to address one of the items in the roadmap
52 | - Do a review of someone else's PR and propose next steps (further review, merge, close)
53 | 
54 | Volunteer work in any of these will get acknowledged.
55 | 


--------------------------------------------------------------------------------
/TERMS_OF_USE.md:
--------------------------------------------------------------------------------
 1 | # Terms of Use
 2 | 
 3 | By using gpt-engineer you are aware of and agree to the below Terms of Use, as well as the attached [disclaimer of warranty](https://github.com/AntonOsika/gpt-engineer/blob/main/DISCLAIMER.md).
 4 | 
 5 | Both OpenAI, L.L.C. and the creators of gpt-engineer **store data
 6 | about how gpt-engineer is used** with the sole intent of improving the capability of the product. Care is taken to not store any information that can be tied to a person.
 7 | 
 8 | Please beware that natural text input, such as the files `prompt` and `feedback`, will be stored and this can, in theory, be used to (although the gpt-engineer creators will never attempt to do so) connect a person's style of writing or content in the files to a real person.
 9 | 
10 | More information about OpenAI's terms of use [here](https://openai.com/policies/terms-of-use).
11 | 
12 | You can disable storing usage data by gpt-engineer, **but not OpenAI**, by setting the environment variable COLLECT_LEARNINGS_OPT_OUT=true.
13 | 


--------------------------------------------------------------------------------
/WINDOWS_README.md:
--------------------------------------------------------------------------------
 1 | # Windows Setup
 2 | ## Short version
 3 | 
 4 | On Windows, follow the standard [README.md](https://github.com/AntonOsika/gpt-engineer/blob/main/README.md), but to set API key do one of:
 5 | - `set OPENAI_API_KEY=[your api key]` on cmd
 6 | - `$env:OPENAI_API_KEY="[your api key]"` on powershell
 7 | 
 8 | ## Full setup guide
 9 | 
10 | Choose either **stable** or **development**.
11 | 
12 | For **stable** release:
13 | 
14 | Run `pip install gpt-engineer` in the command line
15 | 
16 | Or:
17 | 
18 |   1. Open your web browser and navigate to the Python Package Index (PyPI) website: <https://pypi.org/project/gpt-engineer/>.
19 |   2. On the PyPI page for the gpt-engineer package, locate the "Download files" section. Here you'll find a list of available versions and their corresponding download links.
20 |   3. Identify the version of gpt-engineer you want to install and click on the associated download link. This will download the package file (usually a .tar.gz or .whl file) to your computer.
21 |   4. Once the package file is downloaded, open your Python development environment or IDE.
22 |   5. In your Python development environment, look for an option to install packages or manage dependencies. The exact location and terminology may vary depending on your IDE. For example, in PyCharm, you can go to "File" > "Settings" > "Project: \<project-name>" > "Python Interpreter" to manage packages.
23 |   6. In the package management interface, you should see a list of installed packages. Look for an option to add or install a new package.
24 |   7. Click on the "Add Package" or "Install Package" button.
25 |   8. In the package installation dialog, choose the option to install from a file or from a local source.
26 |   9. Browse and select the downloaded gpt-engineer package file from your computer.
27 | 
28 | For **development**:
29 | 
30 | - `git clone git@github.com:AntonOsika/gpt-engineer.git`
31 | - `cd gpt-engineer`
32 | - `pip install -e .`
33 |   - (or: `make install && source venv/bin/activate` for a venv)
34 | 
35 | ### Setup
36 | 
37 | With an api key from OpenAI:
38 | 
39 | Run `set OPENAI_API_KEY=[your API key]` in the command line
40 | 
41 | Or:
42 | 
43 |   1. In the Start Menu, type to search for "Environment Variables" and click on "Edit the system environment variables".
44 |   2. In the System Properties window, click on the "Environment Variables" button.
45 |   3. In the Environment Variables window, you'll see two sections: User variables and System variables.
46 |   4. To set a user-specific environment variable, select the "New" button under the User variables section.
47 |   5. To set a system-wide environment variable, select the "New" button under the System variables section.
48 |   6. Enter the variable name "OPENAI_API_KEY" in the "Variable name" field.
49 |   7. Enter the variable value (e.g., your API key) in the "Variable value" field.
50 |   8. Click "OK" to save the changes.
51 |   9. Close any open command prompt or application windows and reopen them for the changes to take effect.
52 | 
53 | Now you can use `%OPENAI_API_KEY%` when prompted to input your key.
54 | 
55 | ### Run
56 | 
57 | - Create an empty folder. If inside the repo, you can:
58 |   - Run `xcopy /E projects\example projects\my-new-project` in the command line
59 |   - Or hold CTRL and drag the folder down to create a copy, then rename to fit your project
60 | - Fill in the `prompt` file in your new folder
61 | - `gpt-engineer projects/my-new-project`
62 |   - (Note, `gpt-engineer --help` lets you see all available options. For example `--steps use_feedback` lets you improve/fix code in a project)
63 | 
64 | By running gpt-engineer you agree to our [ToS](https://github.com/AntonOsika/gpt-engineer/TERMS_OF_USE.md).
65 | 
66 | ### Results
67 | 
68 | - Check the generated files in `projects/my-new-project/workspace`
69 | 


--------------------------------------------------------------------------------
/benchmark/RESULTS.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks
 2 | 
 3 | ```bash
 4 | python scripts/benchmark.py
 5 | ```
 6 | 
 7 | ## 2023-06-21
 8 | 
 9 | | Benchmark          | Ran | Works | Perfect |
10 | |--------------------|-----|-------|---------|
11 | | currency_converter | ✅  | ❌    | ❌      |
12 | | image_resizer      | ✅  | ✅    | ✅      |
13 | | pomodoro_timer     | ✅  | ✅    | ✅      |
14 | | url_shortener      | ✅  | ✅    | ✅      |
15 | | file_explorer      | ✅  | ✅    | ✅      |
16 | | markdown_editor    | ✅  | ✅    | ❌      |
17 | | timer_app          | ✅  | ❌    | ❌      |
18 | | weather_app        | ✅  | ✅    | ✅      |
19 | | file_organizer     | ✅  | ✅    | ✅      |
20 | | password_generator | ✅  | ✅    | ✅      |
21 | | todo_list          | ✅  | ✅    | ✅      |
22 | 
23 | ## Notes on the errors
24 | 
25 | Most errors come from that the "generate entrypoint" are incorrect. Ignoring
26 | those, we get 8/11 fully correct.
27 | 
28 | All errors are very easy to fix.
29 | 
30 | One error was trying to modify a constant.
31 | One error was that the html template was not fully filled in.
32 | One error is that a dependency was used incorrectly and easy to fix
33 | 
34 | ## 2023-06-19
35 | 
36 | | Benchmark          | Ran | Works | Perfect |
37 | |--------------------|-----|-------|---------|
38 | | currency_converter | ❌  | ❌    | ❌      |
39 | | image_resizer      | ✅  | ❌    | ❌      |
40 | | pomodoro_timer     | ❌  | ❌    | ❌      |
41 | | url_shortener      | ❌  | ❌    | ❌      |
42 | | file_explorer      | ✅  | ✅    | ✅      |
43 | | markdown_editor    | ❌  | ❌    | ❌      |
44 | | timer_app          | ✅  | ❌    | ❌      |
45 | | weather_app        | ❌  | ❌    | ❌      |
46 | | file_organizer     | ✅  | ✅    | ✅      |
47 | | password_generator | ✅  | ✅    | ✅      |
48 | | todo_list          | ✅  | ❌    | ❌      |
49 | 
50 | ## Notes on the errors
51 | 
52 | **timer_app** almost works with unit tests config
53 | 
54 | - failure mode: undefined import/conflicting names
55 | 
56 | **file_explorer** works
57 | 
58 | **file organiser**: works
59 | 
60 | **image_resizer** almost works with unit tests config
61 | 
62 | - failure mode: undefined import
63 | 
64 | **todo_list** runs. doesn't really work with unit tests config
65 | Uncaught ReferenceError: module is not defined
66 | 
67 | - failure mode: placeholder text
68 | 
69 | url_shortener starts but gets the error:
70 |   SQLite objects created in a thread can only be used in that same thread. The object was created in thread id 8636125824 and this is thread id 13021003776.
71 | 
72 | markdown_editor:
73 | failing tests, 'WebDriver' object has no attribute 'find_element_by_id'
74 | 
75 | pomodoro: doesn't run it only tests
76 | 
77 | currency_converter: backend doesn't return anything
78 | 
79 | weather_app only runs test, no code existed
80 | 


--------------------------------------------------------------------------------
/benchmark/currency_converter/prompt:
--------------------------------------------------------------------------------
1 | Build a currency converter app using an API for exchange rates. Use HTML, CSS, and JavaScript for the frontend and Node.js for the backend. Allow users to convert between different currencies.
2 | 


--------------------------------------------------------------------------------
/benchmark/file_explorer/prompt:
--------------------------------------------------------------------------------
1 | Create a basic file explorer CLI tool in Python that allows users to navigate through directories, view file contents, and perform basic file operations (copy, move, delete).
2 | 


--------------------------------------------------------------------------------
/benchmark/file_organizer/prompt:
--------------------------------------------------------------------------------
1 | Create a file organizer CLI tool in Python that sorts files in a directory based on their file types (e.g., images, documents, audio) and moves them into corresponding folders.
2 | 


--------------------------------------------------------------------------------
/benchmark/image_resizer/prompt:
--------------------------------------------------------------------------------
1 | Create a CLI tool in Python that allows users to resize images by specifying the desired width and height. Use the Pillow library for image manipulation.
2 | 


--------------------------------------------------------------------------------
/benchmark/markdown_editor/prompt:
--------------------------------------------------------------------------------
1 | Build a simple markdown editor using HTML, CSS, and JavaScript. Allow users to input markdown text and display the formatted output in real-time.
2 | 


--------------------------------------------------------------------------------
/benchmark/password_generator/prompt:
--------------------------------------------------------------------------------
1 | Create a password generator CLI tool in Python that generates strong, random passwords based on user-specified criteria, such as length and character types (letters, numbers, symbols).
2 | 


--------------------------------------------------------------------------------
/benchmark/pomodoro_timer/prompt:
--------------------------------------------------------------------------------
1 | Develop a Pomodoro timer app using HTML, CSS, and JavaScript. Allow users to set work and break intervals and receive notifications when it's time to switch.
2 | 


--------------------------------------------------------------------------------
/benchmark/timer_app/prompt:
--------------------------------------------------------------------------------
1 | Create a simple timer app using HTML, CSS, and JavaScript that allows users to set a countdown timer and receive an alert when the time is up.
2 | 


--------------------------------------------------------------------------------
/benchmark/todo_list/prompt:
--------------------------------------------------------------------------------
1 | Create a simple to-do list app using HTML, CSS, and JavaScript. Store tasks in local storage and allow users to add, edit, and delete tasks.
2 | 


--------------------------------------------------------------------------------
/benchmark/url_shortener/prompt:
--------------------------------------------------------------------------------
1 | Create a URL shortener app using HTML, CSS, JavaScript, and a backend language like Python or Node.js. Allow users to input a long URL and generate a shortened version that redirects to the original URL. Store the shortened URLs in a database.
2 | 


--------------------------------------------------------------------------------
/gpt_engineer/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "morph"
2 | 


--------------------------------------------------------------------------------
/gpt_engineer/ai.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | 
  5 | from dataclasses import dataclass
  6 | from typing import Dict, List
  7 | 
  8 | import openai
  9 | import tiktoken
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | @dataclass
 15 | class TokenUsage:
 16 |     step_name: str
 17 |     in_step_prompt_tokens: int
 18 |     in_step_completion_tokens: int
 19 |     in_step_total_tokens: int
 20 |     total_prompt_tokens: int
 21 |     total_completion_tokens: int
 22 |     total_tokens: int
 23 | 
 24 | 
 25 | class AI:
 26 |     def __init__(self, model="gpt-4", temperature=0.1):
 27 |         self.temperature = temperature
 28 |         self.model = model
 29 | 
 30 |         # initialize token usage log
 31 |         self.cumulative_prompt_tokens = 0
 32 |         self.cumulative_completion_tokens = 0
 33 |         self.cumulative_total_tokens = 0
 34 |         self.token_usage_log = []
 35 | 
 36 |         try:
 37 |             self.tokenizer = tiktoken.encoding_for_model(model)
 38 |         except KeyError:
 39 |             logger.debug(
 40 |                 f"Tiktoken encoder for model {model} not found. Using "
 41 |                 "cl100k_base encoder instead. The results may therefore be "
 42 |                 "inaccurate and should only be used as estimate."
 43 |             )
 44 |             self.tokenizer = tiktoken.get_encoding("cl100k_base")
 45 | 
 46 |     def start(self, system, user, step_name):
 47 |         messages = [
 48 |             {"role": "system", "content": system},
 49 |             {"role": "user", "content": user},
 50 |         ]
 51 | 
 52 |         return self.next(messages, step_name=step_name)
 53 | 
 54 |     def fsystem(self, msg):
 55 |         return {"role": "system", "content": msg}
 56 | 
 57 |     def fuser(self, msg):
 58 |         return {"role": "user", "content": msg}
 59 | 
 60 |     def fassistant(self, msg):
 61 |         return {"role": "assistant", "content": msg}
 62 | 
 63 |     def next(self, messages: List[Dict[str, str]], prompt=None, *, step_name=None):
 64 |         if prompt:
 65 |             messages += [{"role": "user", "content": prompt}]
 66 | 
 67 |         logger.debug(f"Creating a new chat completion: {messages}")
 68 |         response = openai.ChatCompletion.create(
 69 |             messages=messages,
 70 |             stream=True,
 71 |             model=self.model,
 72 |             temperature=self.temperature,
 73 |         )
 74 | 
 75 |         chat = []
 76 |         for chunk in response:
 77 |             delta = chunk["choices"][0]["delta"]  # type: ignore
 78 |             msg = delta.get("content", "")
 79 |             print(msg, end="")
 80 |             chat.append(msg)
 81 |         print()
 82 |         messages += [{"role": "assistant", "content": "".join(chat)}]
 83 |         logger.debug(f"Chat completion finished: {messages}")
 84 | 
 85 |         self.update_token_usage_log(
 86 |             messages=messages, answer="".join(chat), step_name=step_name
 87 |         )
 88 | 
 89 |         return messages
 90 | 
 91 |     def update_token_usage_log(self, messages, answer, step_name):
 92 |         prompt_tokens = self.num_tokens_from_messages(messages)
 93 |         completion_tokens = self.num_tokens(answer)
 94 |         total_tokens = prompt_tokens + completion_tokens
 95 | 
 96 |         self.cumulative_prompt_tokens += prompt_tokens
 97 |         self.cumulative_completion_tokens += completion_tokens
 98 |         self.cumulative_total_tokens += total_tokens
 99 | 
100 |         self.token_usage_log.append(
101 |             TokenUsage(
102 |                 step_name=step_name,
103 |                 in_step_prompt_tokens=prompt_tokens,
104 |                 in_step_completion_tokens=completion_tokens,
105 |                 in_step_total_tokens=total_tokens,
106 |                 total_prompt_tokens=self.cumulative_prompt_tokens,
107 |                 total_completion_tokens=self.cumulative_completion_tokens,
108 |                 total_tokens=self.cumulative_total_tokens,
109 |             )
110 |         )
111 | 
112 |     def format_token_usage_log(self):
113 |         result = "step_name,"
114 |         result += "prompt_tokens_in_step,completion_tokens_in_step,total_tokens_in_step"
115 |         result += ",total_prompt_tokens,total_completion_tokens,total_tokens\n"
116 |         for log in self.token_usage_log:
117 |             result += log.step_name + ","
118 |             result += str(log.in_step_prompt_tokens) + ","
119 |             result += str(log.in_step_completion_tokens) + ","
120 |             result += str(log.in_step_total_tokens) + ","
121 |             result += str(log.total_prompt_tokens) + ","
122 |             result += str(log.total_completion_tokens) + ","
123 |             result += str(log.total_tokens) + "\n"
124 |         return result
125 | 
126 |     def num_tokens(self, txt):
127 |         return len(self.tokenizer.encode(txt))
128 | 
129 |     def num_tokens_from_messages(self, messages):
130 |         """Returns the number of tokens used by a list of messages."""
131 |         n_tokens = 0
132 |         for message in messages:
133 |             n_tokens += (
134 |                 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
135 |             )
136 |             for key, value in message.items():
137 |                 n_tokens += self.num_tokens(value)
138 |                 if key == "name":  # if there's a name, the role is omitted
139 |                     n_tokens += -1  # role is always required and always 1 token
140 |         n_tokens += 2  # every reply is primed with <im_start>assistant
141 |         return n_tokens
142 | 
143 | 
144 | def fallback_model(model: str) -> str:
145 |     try:
146 |         openai.Model.retrieve(model)
147 |         return model
148 |     except openai.InvalidRequestError:
149 |         print(
150 |             f"Model {model} not available for provided API key. Reverting "
151 |             "to gpt-3.5-turbo. Sign up for the GPT-4 wait list here: "
152 |             "https://openai.com/waitlist/gpt-4-api\n"
153 |         )
154 |         return "gpt-3.5-turbo-16k"
155 | 


--------------------------------------------------------------------------------
/gpt_engineer/chat_to_files.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def parse_chat(chat):  # -> List[Tuple[str, str]]:
 5 |     # Get all ``` blocks and preceding filenames
 6 |     regex = r"(\S+)\n\s*```[^\n]*\n(.+?)```"
 7 |     matches = re.finditer(regex, chat, re.DOTALL)
 8 | 
 9 |     files = []
10 |     for match in matches:
11 |         # Strip the filename of any non-allowed characters and convert / to \
12 |         path = re.sub(r'[<>"|?*]', "", match.group(1))
13 | 
14 |         # Remove leading and trailing brackets
15 |         path = re.sub(r"^\[(.*)\]$", r"\1", path)
16 | 
17 |         # Remove leading and trailing backticks
18 |         path = re.sub(r"^`(.*)`$", r"\1", path)
19 | 
20 |         # Remove trailing ]
21 |         path = re.sub(r"\]$", "", path)
22 | 
23 |         # Get the code
24 |         code = match.group(2)
25 | 
26 |         # Add the file to the list
27 |         files.append((path, code))
28 | 
29 |     # Get all the text before the first ``` block
30 |     readme = chat.split("```")[0]
31 |     files.append(("README.md", readme))
32 | 
33 |     # Return the files
34 |     return files
35 | 
36 | 
37 | def to_files(chat, workspace):
38 |     workspace["all_output.txt"] = chat
39 | 
40 |     files = parse_chat(chat)
41 |     for file_name, file_content in files:
42 |         workspace[file_name] = file_content
43 | 


--------------------------------------------------------------------------------
/gpt_engineer/collect.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | 
 3 | from typing import List
 4 | 
 5 | from gpt_engineer import steps
 6 | from gpt_engineer.db import DBs
 7 | from gpt_engineer.domain import Step
 8 | from gpt_engineer.learning import Learning, extract_learning
 9 | 
10 | 
11 | def send_learning(learning: Learning):
12 |     import rudderstack.analytics as rudder_analytics
13 | 
14 |     rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG"
15 |     rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com"
16 | 
17 |     rudder_analytics.track(
18 |         user_id=learning.session,
19 |         event="learning",
20 |         properties=learning.to_dict(),  # type: ignore
21 |     )
22 | 
23 | 
24 | def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs):
25 |     learnings = extract_learning(
26 |         model, temperature, steps, dbs, steps_file_hash=steps_file_hash()
27 |     )
28 |     send_learning(learnings)
29 | 
30 | 
31 | def steps_file_hash():
32 |     with open(steps.__file__, "r") as f:
33 |         content = f.read()
34 |         return hashlib.sha256(content.encode("utf-8")).hexdigest()
35 | 


--------------------------------------------------------------------------------
/gpt_engineer/db.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import shutil
 3 | 
 4 | from dataclasses import dataclass
 5 | from pathlib import Path
 6 | from typing import Any, Dict, Optional
 7 | 
 8 | 
 9 | # This class represents a simple database that stores its data as files in a directory.
10 | class DB:
11 |     """A simple key-value store, where keys are filenames and values are file contents."""
12 | 
13 |     def __init__(self, path, in_memory_dict: Optional[Dict[Any, Any]] = None):
14 |         self.path = Path(path).absolute()
15 | 
16 |         self.path.mkdir(parents=True, exist_ok=True)
17 |         self.in_memory_dict = in_memory_dict
18 | 
19 |     def __contains__(self, key):
20 |         return (self.path / key).is_file()
21 | 
22 |     def __getitem__(self, key):
23 |         if self.in_memory_dict is not None:
24 |             return self.in_memory_dict.__getitem__(str((self.path / key).absolute()))
25 |         full_path = self.path / key
26 |         if not full_path.is_file():
27 |             raise KeyError(f"File '{key}' could not be found in '{self.path}'")
28 |         with full_path.open("r", encoding="utf-8") as f:
29 |             return f.read()
30 | 
31 |     def get(self, key, default=None):
32 |         try:
33 |             return self[key]
34 |         except KeyError:
35 |             return default
36 | 
37 |     def __setitem__(self, key, val):
38 |         if self.in_memory_dict is not None:
39 |             return self.in_memory_dict.__setitem__(str((self.path / key).absolute()), val)
40 |         full_path = self.path / key
41 |         full_path.parent.mkdir(parents=True, exist_ok=True)
42 | 
43 |         if isinstance(val, str):
44 |             full_path.write_text(val, encoding="utf-8")
45 |         else:
46 |             # If val is neither a string nor bytes, raise an error.
47 |             raise TypeError("val must be either a str or bytes")
48 | 
49 | 
50 | # dataclass for all dbs:
51 | @dataclass
52 | class DBs:
53 |     memory: DB
54 |     logs: DB
55 |     preprompts: DB
56 |     input: DB
57 |     workspace: DB
58 |     archive: DB
59 | 
60 | 
61 | def archive(dbs: DBs):
62 |     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
63 |     shutil.move(
64 |         str(dbs.memory.path), str(dbs.archive.path / timestamp / dbs.memory.path.name)
65 |     )
66 |     shutil.move(
67 |         str(dbs.workspace.path),
68 |         str(dbs.archive.path / timestamp / dbs.workspace.path.name),
69 |     )
70 |     return []
71 | 


--------------------------------------------------------------------------------
/gpt_engineer/domain.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, List, TypeVar
2 | 
3 | from gpt_engineer.ai import AI
4 | from gpt_engineer.db import DBs
5 | 
6 | Step = TypeVar("Step", bound=Callable[[AI, DBs], List[dict]])
7 | 


--------------------------------------------------------------------------------
/gpt_engineer/learning.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | import tempfile
  5 | 
  6 | from dataclasses import dataclass, field
  7 | from datetime import datetime
  8 | from pathlib import Path
  9 | from typing import List, Optional
 10 | 
 11 | from dataclasses_json import dataclass_json
 12 | from termcolor import colored
 13 | 
 14 | from gpt_engineer.db import DB, DBs
 15 | from gpt_engineer.domain import Step
 16 | 
 17 | 
 18 | @dataclass_json
 19 | @dataclass
 20 | class Review:
 21 |     ran: Optional[bool]
 22 |     perfect: Optional[bool]
 23 |     works: Optional[bool]
 24 |     comments: str
 25 |     raw: str
 26 | 
 27 | 
 28 | @dataclass_json
 29 | @dataclass
 30 | class Learning:
 31 |     model: str
 32 |     temperature: float
 33 |     steps: str
 34 |     steps_file_hash: str
 35 |     prompt: str
 36 |     logs: str
 37 |     workspace: str
 38 |     feedback: Optional[str]
 39 |     session: str
 40 |     review: Optional[Review]
 41 |     timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
 42 |     version: str = "0.3"
 43 | 
 44 | 
 45 | def colored(*args):
 46 |     return args[0]
 47 | 
 48 | TERM_CHOICES = (
 49 |     colored("y", "green")
 50 |     + "/"
 51 |     + colored("n", "red")
 52 |     + "/"
 53 |     + colored("u", "yellow")
 54 |     + "(ncertain): "
 55 | )
 56 | 
 57 | 
 58 | def human_input() -> Review:
 59 |     print()
 60 |     print(
 61 |         colored("To help gpt-engineer learn, please answer 3 questions:", "light_green")
 62 |     )
 63 |     print()
 64 | 
 65 |     ran = input("Did the generated code run at all? " + TERM_CHOICES)
 66 |     while ran not in ("y", "n", "u"):
 67 |         ran = input("Invalid input. Please enter y, n, or u: ")
 68 | 
 69 |     perfect = ""
 70 |     useful = ""
 71 | 
 72 |     if ran == "y":
 73 |         perfect = input(
 74 |             "Did the generated code do everything you wanted? " + TERM_CHOICES
 75 |         )
 76 |         while perfect not in ("y", "n", "u"):
 77 |             perfect = input("Invalid input. Please enter y, n, or u: ")
 78 | 
 79 |         if perfect != "y":
 80 |             useful = input("Did the generated code do anything useful? " + TERM_CHOICES)
 81 |             while useful not in ("y", "n", "u"):
 82 |                 useful = input("Invalid input. Please enter y, n, or u: ")
 83 | 
 84 |     comments = ""
 85 |     if perfect != "y":
 86 |         comments = input(
 87 |             "If you have time, please explain what was not working "
 88 |             + colored("(ok to leave blank)\n", "light_green")
 89 |         )
 90 | 
 91 |     check_consent()
 92 | 
 93 |     return Review(
 94 |         raw=", ".join([ran, perfect, useful]),
 95 |         ran={"y": True, "n": False, "u": None, "": None}[ran],
 96 |         works={"y": True, "n": False, "u": None, "": None}[useful],
 97 |         perfect={"y": True, "n": False, "u": None, "": None}[perfect],
 98 |         comments=comments,
 99 |     )
100 | 
101 | 
102 | def check_consent():
103 |     path = Path(".gpte_consent")
104 |     if path.exists() and path.read_text() == "true":
105 |         return
106 |     ans = input("Is it ok if we store your prompts to learn? (y/n)")
107 |     while ans.lower() not in ("y", "n"):
108 |         ans = input("Invalid input. Please enter y or n: ")
109 | 
110 |     if ans.lower() == "y":
111 |         path.write_text("true")
112 |         print(colored("Thank you️", "light_green"))
113 |         print()
114 |         print("(If you change your mind, delete the file .gpte_consent)")
115 |     else:
116 |         print(colored("We understand ❤️", "light_green"))
117 | 
118 | 
119 | def collect_consent() -> bool:
120 |     opt_out = os.environ.get("COLLECT_LEARNINGS_OPT_OUT") == "true"
121 |     consent_flag = Path(".gpte_consent")
122 |     has_given_consent = consent_flag.exists() and consent_flag.read_text() == "true"
123 | 
124 |     if opt_out:
125 |         if has_given_consent:
126 |             return ask_if_can_store()
127 |         return False
128 | 
129 |     if has_given_consent:
130 |         return True
131 | 
132 |     if ask_if_can_store():
133 |         consent_flag.write_text("true")
134 |         print()
135 |         print("(If you change your mind, delete the file .gpte_consent)")
136 |         return True
137 |     return False
138 | 
139 | 
140 | def ask_if_can_store() -> bool:
141 |     print()
142 |     can_store = input(
143 |         "Have you understood and agree to that "
144 |         + colored("OpenAI ", "light_green")
145 |         + "and "
146 |         + colored("gpt-engineer ", "light_green")
147 |         + "store anonymous learnings about how gpt-engineer is used "
148 |         + "(with the sole purpose of improving it)?\n(y/n)"
149 |     ).lower()
150 |     while can_store not in ("y", "n"):
151 |         can_store = input("Invalid input. Please enter y or n: ").lower()
152 | 
153 |     if can_store == "n":
154 |         print(colored("Ok we understand", "light_green"))
155 | 
156 |     return can_store == "y"
157 | 
158 | 
159 | def logs_to_string(steps: List[Step], logs: DB):
160 |     chunks = []
161 |     for step in steps:
162 |         chunks.append(f"--- {step.__name__} ---\n")
163 |         messages = json.loads(logs[step.__name__])
164 |         chunks.append(format_messages(messages))
165 |     return "\n".join(chunks)
166 | 
167 | 
168 | def format_messages(messages: List[dict]) -> str:
169 |     return "\n".join(
170 |         [f"{message['role']}:\n\n{message['content']}" for message in messages]
171 |     )
172 | 
173 | 
174 | def extract_learning(
175 |     model: str, temperature: float, steps: List[Step], dbs: DBs, steps_file_hash
176 | ) -> Learning:
177 |     review = None
178 |     if "review" in dbs.memory:
179 |         review = Review.from_json(dbs.memory["review"])  # type: ignore
180 |     learning = Learning(
181 |         prompt=dbs.input["prompt"],
182 |         model=model,
183 |         temperature=temperature,
184 |         steps=json.dumps([step.__name__ for step in steps]),
185 |         steps_file_hash=steps_file_hash,
186 |         feedback=dbs.input.get("feedback"),
187 |         session=get_session(),
188 |         logs=logs_to_string(steps, dbs.logs),
189 |         workspace=dbs.workspace["all_output.txt"],
190 |         review=review,
191 |     )
192 |     return learning
193 | 
194 | 
195 | def get_session():
196 |     path = Path(tempfile.gettempdir()) / "gpt_engineer_user_id.txt"
197 | 
198 |     try:
199 |         if path.exists():
200 |             user_id = path.read_text()
201 |         else:
202 |             # random uuid:
203 |             user_id = str(random.randint(0, 2**32))
204 |             path.write_text(user_id)
205 |         return user_id
206 |     except IOError:
207 |         return "ephemeral_" + str(random.randint(0, 2**32))
208 | 


--------------------------------------------------------------------------------
/gpt_engineer/main.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | import typer
 7 | 
 8 | from gpt_engineer.ai import AI, fallback_model
 9 | from gpt_engineer.collect import collect_learnings
10 | from gpt_engineer.db import DB, DBs, archive
11 | from gpt_engineer.learning import collect_consent
12 | from gpt_engineer.steps import STEPS, Config as StepsConfig
13 | 
14 | app = typer.Typer()
15 | 
16 | 
17 | @app.command()
18 | def main(
19 |     project_path: str = typer.Argument("projects/example", help="path"),
20 |     model: str = typer.Argument("gpt-4", help="model id string"),
21 |     temperature: float = 0.1,
22 |     steps_config: StepsConfig = typer.Option(
23 |         StepsConfig.DEFAULT, "--steps", "-s", help="decide which steps to run"
24 |     ),
25 |     verbose: bool = typer.Option(False, "--verbose", "-v"),
26 | ):
27 |     logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
28 | 
29 |     model = fallback_model(model)
30 |     ai = AI(
31 |         model=model,
32 |         temperature=temperature,
33 |     )
34 | 
35 |     input_path = Path(project_path).absolute()
36 |     memory_path = input_path / "memory"
37 |     workspace_path = input_path / "workspace"
38 |     archive_path = input_path / "archive"
39 | 
40 |     dbs = DBs(
41 |         memory=DB(memory_path),
42 |         logs=DB(memory_path / "logs"),
43 |         input=DB(input_path),
44 |         workspace=DB(workspace_path),
45 |         preprompts=DB(Path(__file__).parent / "preprompts"),
46 |         archive=DB(archive_path),
47 |     )
48 | 
49 |     if steps_config not in [
50 |         StepsConfig.EXECUTE_ONLY,
51 |         StepsConfig.USE_FEEDBACK,
52 |         StepsConfig.EVALUATE,
53 |     ]:
54 |         archive(dbs)
55 | 
56 |     steps = STEPS[steps_config]
57 |     for step in steps:
58 |         messages = step(ai, dbs)
59 |         dbs.logs[step.__name__] = json.dumps(messages)
60 | 
61 |     if collect_consent():
62 |         collect_learnings(model, temperature, steps, dbs)
63 | 
64 |     dbs.logs["token_usage"] = ai.format_token_usage_log()
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     app()
69 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/fix_code:
--------------------------------------------------------------------------------
1 | You are a super smart developer. You have been tasked with fixing a program and making it work according to the best of your knowledge. There might be placeholders in the code you have to fill in.
2 | You provide fully functioning, well formatted code with few comments, that works and has no bugs.
3 | Please return the full new code in the same format.
4 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/generate:
--------------------------------------------------------------------------------
 1 | You will get instructions for code to write.
 2 | You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.
 3 | 
 4 | Think step by step and reason yourself to the right decisions to make sure we get it right.
 5 | You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.
 6 | 
 7 | Then you will output the content of each file including ALL code.
 8 | Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
 9 | FILENAME is the lowercase file name including the file extension,
10 | LANG is the markup code block language for the code's language, and CODE is the code:
11 | 
12 | FILENAME
13 | ```LANG
14 | CODE
15 | ```
16 | 
17 | Do not comment on what every file does
18 | 
19 | You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.
20 | Please note that the code should be fully functional. No placeholders.
21 | 
22 | Follow a language and framework appropriate best practice file naming convention.
23 | Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.
24 | Ensure to implement all code, if you are unsure, write a plausible implementation.
25 | Include module dependency or package manager dependency definition file.
26 | Before you finish, double check that all parts of the architecture is present in the files.
27 | 
28 | Here is an example:
29 | 
30 | hello_world.py
31 | ```python
32 | print("hello world!")
33 | ```


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/philosophy:
--------------------------------------------------------------------------------
 1 | You almost always put different classes in different files.
 2 | For Python, you always create an appropriate requirements.txt file.
 3 | For NodeJS, you always create an appropriate package.json file.
 4 | You always add a comment briefly describing the purpose of the function definition.
 5 | You try to add comments explaining very complex bits of logic.
 6 | You always follow the best practices for the requested languages in terms of describing the code written as a defined
 7 | package/project.
 8 | 
 9 | 
10 | Python toolbelt preferences:
11 | - pytest
12 | - dataclasses
13 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/qa:
--------------------------------------------------------------------------------
1 | You will read instructions and not carry them out, only seek to clarify them.
2 | Specifically you will first summarise a list of super short bullets of areas that need clarification.
3 | Then you will pick one clarifying question, and wait for an answer from the user.
4 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/respec:
--------------------------------------------------------------------------------
 1 | You are a pragmatic principal engineer at Google.
 2 | You have been asked to review a specification for a new feature by a previous version of yourself
 3 | 
 4 | You have been asked to give feedback on the following:
 5 | - Is there anything that might not work the way intended by the instructions?
 6 | - Is there anything in the specification missing for the program to work as expected?
 7 | - Is there anything that can be simplified without significant drawback?
 8 | 
 9 | You are asked to make educated assumptions for each unclear item.
10 | For each of these, communicate which assumptions you'll make when implementing the feature.
11 | 
12 | Think step by step to make sure we don't miss anything.
13 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/spec:
--------------------------------------------------------------------------------
 1 | You are a super smart developer. You have been asked to make a specification for a program.
 2 | 
 3 | Think step by step to make sure we get a high quality specification and we don't miss anything.
 4 | First, be super explicit about what the program should do, which features it should have
 5 | and give details about anything that might be unclear. **Don't leave anything unclear or undefined.**
 6 | 
 7 | Second, lay out the names of the core classes, functions, methods that will be necessary,
 8 | as well as a quick comment on their purpose.
 9 | 
10 | This specification will be used later as the basis for the implementation.
11 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/unit_tests:
--------------------------------------------------------------------------------
1 | You are a super smart developer using Test Driven Development to write tests according to a specification.
2 | 
3 | Please generate tests based on the above specification. The tests should be as simple as possible, but still cover all the functionality.
4 | 


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/use_feedback:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morph-labs/gpt-engineer/900202a5498af365e91a46685afe5af787c05b3b/gpt_engineer/preprompts/use_feedback


--------------------------------------------------------------------------------
/gpt_engineer/preprompts/use_qa:
--------------------------------------------------------------------------------
 1 | Please now remember the steps:
 2 | 
 3 | Think step by step and reason yourself to the right decisions to make sure we get it right.
 4 | First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
 5 | 
 6 | Then you will output the content of each file including ALL code.
 7 | Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
 8 | FILENAME is the lowercase file name including the file extension,
 9 | LANG is the markup code block language for the code's language, and CODE is the code:
10 | 
11 | FILENAME
12 | ```LANG
13 | CODE
14 | ```
15 | 
16 | Please note that the code should be fully functional. No placeholders.
17 | 
18 | You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.
19 | Follow a language and framework appropriate best practice file naming convention.
20 | Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.
21 | Before you finish, double check that all parts of the architecture is present in the files.
22 | 


--------------------------------------------------------------------------------
/gpt_engineer/steps.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import json
  3 | import re
  4 | import subprocess
  5 | 
  6 | from enum import Enum
  7 | from typing import List
  8 | 
  9 | from termcolor import colored
 10 | 
 11 | from gpt_engineer.ai import AI
 12 | from gpt_engineer.chat_to_files import to_files
 13 | from gpt_engineer.db import DBs
 14 | from gpt_engineer.learning import human_input
 15 | 
 16 | 
 17 | def colored(*args):
 18 |     return args[0]
 19 | 
 20 | def setup_sys_prompt(dbs: DBs) -> str:
 21 |     return (
 22 |         dbs.preprompts["generate"] + "\nUseful to know:\n" + dbs.preprompts["philosophy"]
 23 |     )
 24 | 
 25 | 
 26 | def get_prompt(dbs: DBs) -> str:
 27 |     """While we migrate we have this fallback getter"""
 28 |     assert (
 29 |         "prompt" in dbs.input or "main_prompt" in dbs.input
 30 |     ), "Please put your prompt in the file `prompt` in the project directory"
 31 | 
 32 |     if "prompt" not in dbs.input:
 33 |         print(
 34 |             colored("Please put the prompt in the file `prompt`, not `main_prompt", "red")
 35 |         )
 36 |         print()
 37 |         return dbs.input["main_prompt"]
 38 | 
 39 |     return dbs.input["prompt"]
 40 | 
 41 | 
 42 | def curr_fn() -> str:
 43 |     """Get the name of the current function"""
 44 |     return inspect.stack()[1].function
 45 | 
 46 | 
 47 | # All steps below have the signature Step
 48 | 
 49 | 
 50 | def simple_gen(ai: AI, dbs: DBs) -> List[dict]:
 51 |     """Run the AI on the main prompt and save the results"""
 52 |     messages = ai.start(setup_sys_prompt(dbs), get_prompt(dbs), step_name=curr_fn())
 53 |     to_files(messages[-1]["content"], dbs.workspace)
 54 |     return messages
 55 | 
 56 | 
 57 | def clarify(ai: AI, dbs: DBs) -> List[dict]:
 58 |     """
 59 |     Ask the user if they want to clarify anything and save the results to the workspace
 60 |     """
 61 |     messages = [ai.fsystem(dbs.preprompts["qa"])]
 62 |     user_input = get_prompt(dbs)
 63 |     while True:
 64 |         messages = ai.next(messages, user_input, step_name=curr_fn())
 65 | 
 66 |         if messages[-1]["content"].strip() == "Nothing more to clarify.":
 67 |             break
 68 | 
 69 |         if messages[-1]["content"].strip().lower().startswith("no"):
 70 |             print("Nothing more to clarify.")
 71 |             break
 72 | 
 73 |         print()
 74 |         user_input = input('(answer in text, or "c" to move on)\n')
 75 |         print()
 76 | 
 77 |         if not user_input or user_input == "c":
 78 |             print("(letting gpt-engineer make its own assumptions)")
 79 |             print()
 80 |             messages = ai.next(
 81 |                 messages,
 82 |                 "Make your own assumptions and state them explicitly before starting",
 83 |                 step_name=curr_fn(),
 84 |             )
 85 |             print()
 86 |             return messages
 87 | 
 88 |         user_input += (
 89 |             "\n\n"
 90 |             "Is anything else unclear? If yes, only answer in the form:\n"
 91 |             "{remaining unclear areas} remaining questions.\n"
 92 |             "{Next question}\n"
 93 |             'If everything is sufficiently clear, only answer "Nothing more to clarify.".'
 94 |         )
 95 | 
 96 |     print()
 97 |     return messages
 98 | 
 99 | 
100 | def gen_spec(ai: AI, dbs: DBs) -> List[dict]:
101 |     """
102 |     Generate a spec from the main prompt + clarifications and save the results to
103 |     the workspace
104 |     """
105 |     messages = [
106 |         ai.fsystem(setup_sys_prompt(dbs)),
107 |         ai.fsystem(f"Instructions: {dbs.input['prompt']}"),
108 |     ]
109 | 
110 |     messages = ai.next(messages, dbs.preprompts["spec"], step_name=curr_fn())
111 | 
112 |     dbs.memory["specification"] = messages[-1]["content"]
113 | 
114 |     return messages
115 | 
116 | 
117 | def respec(ai: AI, dbs: DBs) -> List[dict]:
118 |     messages = json.loads(dbs.logs[gen_spec.__name__])
119 |     messages += [ai.fsystem(dbs.preprompts["respec"])]
120 | 
121 |     messages = ai.next(messages, step_name=curr_fn())
122 |     messages = ai.next(
123 |         messages,
124 |         (
125 |             "Based on the conversation so far, please reiterate the specification for "
126 |             "the program. "
127 |             "If there are things that can be improved, please incorporate the "
128 |             "improvements. "
129 |             "If you are satisfied with the specification, just write out the "
130 |             "specification word by word again."
131 |         ),
132 |         step_name=curr_fn(),
133 |     )
134 | 
135 |     dbs.memory["specification"] = messages[-1]["content"]
136 |     return messages
137 | 
138 | 
139 | def gen_unit_tests(ai: AI, dbs: DBs) -> List[dict]:
140 |     """
141 |     Generate unit tests based on the specification, that should work.
142 |     """
143 |     messages = [
144 |         ai.fsystem(setup_sys_prompt(dbs)),
145 |         ai.fuser(f"Instructions: {dbs.input['prompt']}"),
146 |         ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
147 |     ]
148 | 
149 |     messages = ai.next(messages, dbs.preprompts["unit_tests"], step_name=curr_fn())
150 | 
151 |     dbs.memory["unit_tests"] = messages[-1]["content"]
152 |     to_files(dbs.memory["unit_tests"], dbs.workspace)
153 | 
154 |     return messages
155 | 
156 | 
157 | def gen_clarified_code(ai: AI, dbs: DBs) -> List[dict]:
158 |     """Takes clarification and generates code"""
159 |     messages = json.loads(dbs.logs[clarify.__name__])
160 | 
161 |     messages = [
162 |         ai.fsystem(setup_sys_prompt(dbs)),
163 |     ] + messages[1:]
164 |     messages = ai.next(messages, dbs.preprompts["use_qa"], step_name=curr_fn())
165 | 
166 |     to_files(messages[-1]["content"], dbs.workspace)
167 |     return messages
168 | 
169 | 
170 | def gen_code(ai: AI, dbs: DBs) -> List[dict]:
171 |     # get the messages from previous step
172 |     messages = [
173 |         ai.fsystem(setup_sys_prompt(dbs)),
174 |         ai.fuser(f"Instructions: {dbs.input['prompt']}"),
175 |         ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
176 |         ai.fuser(f"Unit tests:\n\n{dbs.memory['unit_tests']}"),
177 |     ]
178 |     messages = ai.next(messages, dbs.preprompts["use_qa"], step_name=curr_fn())
179 |     to_files(messages[-1]["content"], dbs.workspace)
180 |     return messages
181 | 
182 | 
183 | def execute_entrypoint(ai: AI, dbs: DBs) -> List[dict]:
184 |     command = dbs.workspace["run.sh"]
185 | 
186 |     print("Do you want to execute this code?")
187 |     print()
188 |     print(command)
189 |     print()
190 |     print('If yes, press enter. Otherwise, type "no"')
191 |     print()
192 |     if input() not in ["", "y", "yes"]:
193 |         print("Ok, not executing the code.")
194 |         return []
195 |     print("Executing the code...")
196 |     print()
197 |     print(
198 |         colored(
199 |             "Note: If it does not work as expected, consider running the code"
200 |             + " in another way than above.",
201 |             "green",
202 |         )
203 |     )
204 |     print()
205 |     print("You can press ctrl+c *once* to stop the execution.")
206 |     print()
207 | 
208 |     p = subprocess.Popen("bash run.sh", shell=True, cwd=dbs.workspace.path)
209 |     try:
210 |         p.wait()
211 |     except KeyboardInterrupt:
212 |         print()
213 |         print("Stopping execution.")
214 |         print("Execution stopped.")
215 |         p.kill()
216 |         print()
217 | 
218 |     return []
219 | 
220 | 
221 | def gen_entrypoint(ai: AI, dbs: DBs) -> List[dict]:
222 |     messages = ai.start(
223 |         system=(
224 |             "You will get information about a codebase that is currently on disk in "
225 |             "the current folder.\n"
226 |             "From this you will answer with code blocks that includes all the necessary "
227 |             "unix terminal commands to "
228 |             "a) install dependencies "
229 |             "b) run all necessary parts of the codebase (in parallel if necessary).\n"
230 |             "Do not install globally. Do not use sudo.\n"
231 |             "Do not explain the code, just give the commands.\n"
232 |             "Do not use placeholders, use example values (like . for a folder argument) "
233 |             "if necessary.\n"
234 |         ),
235 |         user="Information about the codebase:\n\n" + dbs.workspace["all_output.txt"],
236 |         step_name=curr_fn(),
237 |     )
238 |     print()
239 | 
240 |     regex = r"```\S*\n(.+?)```"
241 |     matches = re.finditer(regex, messages[-1]["content"], re.DOTALL)
242 |     dbs.workspace["run.sh"] = "\n".join(match.group(1) for match in matches)
243 |     return messages
244 | 
245 | 
246 | def use_feedback(ai: AI, dbs: DBs):
247 |     messages = [
248 |         ai.fsystem(setup_sys_prompt(dbs)),
249 |         ai.fuser(f"Instructions: {dbs.input['prompt']}"),
250 |         ai.fassistant(dbs.workspace["all_output.txt"]),
251 |         ai.fsystem(dbs.preprompts["use_feedback"]),
252 |     ]
253 |     messages = ai.next(messages, dbs.input["feedback"], step_name=curr_fn())
254 |     to_files(messages[-1]["content"], dbs.workspace)
255 |     return messages
256 | 
257 | 
258 | def fix_code(ai: AI, dbs: DBs):
259 |     code_output = json.loads(dbs.logs[gen_code.__name__])[-1]["content"]
260 |     messages = [
261 |         ai.fsystem(setup_sys_prompt(dbs)),
262 |         ai.fuser(f"Instructions: {dbs.input['prompt']}"),
263 |         ai.fuser(code_output),
264 |         ai.fsystem(dbs.preprompts["fix_code"]),
265 |     ]
266 |     messages = ai.next(
267 |         messages, "Please fix any errors in the code above.", step_name=curr_fn()
268 |     )
269 |     to_files(messages[-1]["content"], dbs.workspace)
270 |     return messages
271 | 
272 | 
273 | def human_review(ai: AI, dbs: DBs):
274 |     review = human_input()
275 |     dbs.memory["review"] = review.to_json()  # type: ignore
276 |     return []
277 | 
278 | 
279 | class Config(str, Enum):
280 |     DEFAULT = "default"
281 |     BENCHMARK = "benchmark"
282 |     SIMPLE = "simple"
283 |     TDD = "tdd"
284 |     TDD_PLUS = "tdd+"
285 |     CLARIFY = "clarify"
286 |     RESPEC = "respec"
287 |     EXECUTE_ONLY = "execute_only"
288 |     EVALUATE = "evaluate"
289 |     USE_FEEDBACK = "use_feedback"
290 | 
291 | 
292 | # Different configs of what steps to run
293 | STEPS = {
294 |     Config.DEFAULT: [
295 |         clarify,
296 |         gen_clarified_code,
297 |         gen_entrypoint,
298 |         execute_entrypoint,
299 |         human_review,
300 |     ],
301 |     Config.BENCHMARK: [simple_gen, gen_entrypoint],
302 |     Config.SIMPLE: [simple_gen, gen_entrypoint, execute_entrypoint],
303 |     Config.TDD: [
304 |         gen_spec,
305 |         gen_unit_tests,
306 |         gen_code,
307 |         gen_entrypoint,
308 |         execute_entrypoint,
309 |         human_review,
310 |     ],
311 |     Config.TDD_PLUS: [
312 |         gen_spec,
313 |         gen_unit_tests,
314 |         gen_code,
315 |         fix_code,
316 |         gen_entrypoint,
317 |         execute_entrypoint,
318 |         human_review,
319 |     ],
320 |     Config.CLARIFY: [
321 |         clarify,
322 |         gen_clarified_code,
323 |         gen_entrypoint,
324 |         execute_entrypoint,
325 |         human_review,
326 |     ],
327 |     Config.RESPEC: [
328 |         gen_spec,
329 |         respec,
330 |         gen_unit_tests,
331 |         gen_code,
332 |         fix_code,
333 |         gen_entrypoint,
334 |         execute_entrypoint,
335 |         human_review,
336 |     ],
337 |     Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
338 |     Config.EXECUTE_ONLY: [execute_entrypoint],
339 |     Config.EVALUATE: [execute_entrypoint, human_review],
340 | }
341 | 
342 | # Future steps that can be added:
343 | # run_tests_and_fix_files
344 | # execute_entrypoint_and_fix_files_if_it_results_in_error
345 | 


--------------------------------------------------------------------------------
/projects/example/prompt:
--------------------------------------------------------------------------------
1 | We are writing snake in python. MVC components split in separate files. Keyboard control.
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools", "wheel"]
  3 | 
  4 | [project]
  5 | name = "gpt-engineer"
  6 | version = "0.0.7"
  7 | description = "Specify what you want it to build, the AI asks for clarification, and then builds it."
  8 | readme = "README.md"
  9 | requires-python = ">=3.8"
 10 | dependencies = [
 11 |   'black == 23.3.0',
 12 |   'click >= 8.0.0',
 13 |   'mypy == 1.3.0',
 14 |   'openai == 0.27.8',
 15 |   'pre-commit == 3.3.3',
 16 |   'pytest == 7.3.1',
 17 |   'ruff == 0.0.272',
 18 |   'termcolor==2.3.0',
 19 |   'typer >= 0.3.2',
 20 |   'rudder-sdk-python == 2.0.2',
 21 |   'dataclasses-json == 0.5.7',
 22 |   'tiktoken',
 23 |   'tabulate == 0.9.0',
 24 | ]
 25 | 
 26 | classifiers = [
 27 |   "Development Status :: 4 - Beta",
 28 |   "Programming Language :: Python :: 3.8",
 29 |   "Programming Language :: Python :: 3.9",
 30 |   "Programming Language :: Python :: 3.10",
 31 |   "Programming Language :: Python :: 3.11",
 32 |   "License :: OSI Approved :: MIT License",
 33 |   "Topic :: Scientific/Engineering :: Artificial Intelligence",
 34 | ]
 35 | 
 36 | [project.scripts]
 37 | gpt-engineer = 'gpt_engineer.main:app'
 38 | 
 39 | [tool.setuptools]
 40 | packages = ["gpt_engineer"]
 41 | 
 42 | [tool.ruff]
 43 | select = ["F", "E", "W", "I001"]
 44 | line-length = 90
 45 | show-fixes = false
 46 | target-version = "py311"
 47 | task-tags = ["TODO", "FIXME"]
 48 | exclude = [
 49 |     ".bzr",
 50 |     ".direnv",
 51 |     ".eggs",
 52 |     ".git",
 53 |     ".ruff_cache",
 54 |     ".svn",
 55 |     ".tox",
 56 |     ".venv",
 57 |     "__pypackages__",
 58 |     "_build",
 59 |     "buck-out",
 60 |     "build",
 61 |     "dist",
 62 |     "node_modules",
 63 |     "venv",
 64 | ]
 65 | 
 66 | [project.urls]
 67 | "Homepage" = "https://github.com/AntonOsika/gpt-engineer"
 68 | "Bug Tracker" = "https://github.com/AntonOsika/gpt-engineer/issues"
 69 | 
 70 | [tool.ruff.isort]
 71 | known-first-party = []
 72 | known-third-party = []
 73 | section-order = [
 74 |     "future",
 75 |     "standard-library",
 76 |     "third-party",
 77 |     "first-party",
 78 |     "local-folder",
 79 | ]
 80 | combine-as-imports = true
 81 | split-on-trailing-comma = false
 82 | lines-between-types = 1
 83 | 
 84 | [tool.black]
 85 | line-length = 90
 86 | target-version = ["py311"]
 87 | include = '\.pyi?$'
 88 | exclude = '''
 89 | (
 90 |   /(
 91 |       \.direnv
 92 |     | \.eggs
 93 |     | \.git
 94 |     | \.tox
 95 |     | \.venv
 96 |     | _build
 97 |     | build
 98 |     | dist
 99 |     | venv
100 |   )/
101 | )
102 | '''
103 | 


--------------------------------------------------------------------------------
/scripts/benchmark.py:
--------------------------------------------------------------------------------
  1 | # list all folders in benchmark folder
  2 | # for each folder, run the benchmark
  3 | import contextlib
  4 | import json
  5 | import os
  6 | import subprocess
  7 | 
  8 | from datetime import datetime
  9 | from itertools import islice
 10 | from pathlib import Path
 11 | from typing import Iterable, Union
 12 | 
 13 | from tabulate import tabulate
 14 | from typer import run
 15 | 
 16 | 
 17 | def main(
 18 |     n_benchmarks: Union[int, None] = None,
 19 | ):
 20 |     path = Path("benchmark")
 21 | 
 22 |     folders: Iterable[Path] = path.iterdir()
 23 | 
 24 |     if n_benchmarks:
 25 |         folders = islice(folders, n_benchmarks)
 26 | 
 27 |     benchmarks = []
 28 |     for bench_folder in folders:
 29 |         if os.path.isdir(bench_folder):
 30 |             print(f"Running benchmark for {bench_folder}")
 31 | 
 32 |             log_path = bench_folder / "log.txt"
 33 |             log_file = open(log_path, "w")
 34 |             process = subprocess.Popen(
 35 |                 [
 36 |                     "python",
 37 |                     "-u",  # Unbuffered output
 38 |                     "-m",
 39 |                     "gpt_engineer.main",
 40 |                     bench_folder,
 41 |                     "--steps",
 42 |                     "benchmark",
 43 |                 ],
 44 |                 stdout=log_file,
 45 |                 stderr=log_file,
 46 |                 bufsize=0,
 47 |             )
 48 |             benchmarks.append((bench_folder, process, log_file))
 49 | 
 50 |             print("You can stream the log file by running:")
 51 |             print(f"tail -f {log_path}")
 52 |             print()
 53 | 
 54 |     for bench_folder, process, file in benchmarks:
 55 |         process.wait()
 56 |         file.close()
 57 | 
 58 |         print("process", bench_folder.name, "finished with code", process.returncode)
 59 |         print("Running it. Original benchmark prompt:")
 60 |         print()
 61 |         with open(bench_folder / "prompt") as f:
 62 |             print(f.read())
 63 |         print()
 64 | 
 65 |         with contextlib.suppress(KeyboardInterrupt):
 66 |             subprocess.run(
 67 |                 [
 68 |                     "python",
 69 |                     "-m",
 70 |                     "gpt_engineer.main",
 71 |                     bench_folder,
 72 |                     "--steps",
 73 |                     "evaluate",
 74 |                 ],
 75 |             )
 76 | 
 77 |     generate_report(benchmarks, path)
 78 | 
 79 | 
 80 | def generate_report(benchmarks, benchmark_path):
 81 |     headers = ["Benchmark", "Ran", "Works", "Perfect", "Notes"]
 82 |     rows = []
 83 |     for bench_folder, _, _ in benchmarks:
 84 |         memory = bench_folder / "memory"
 85 |         with open(memory / "review") as f:
 86 |             review = json.loads(f.read())
 87 |             rows.append(
 88 |                 [
 89 |                     bench_folder.name,
 90 |                     to_emoji(review.get("ran", None)),
 91 |                     to_emoji(review.get("works", None)),
 92 |                     to_emoji(review.get("perfect", None)),
 93 |                     review.get("comments", None),
 94 |                 ]
 95 |             )
 96 |     table: str = tabulate(rows, headers, tablefmt="pipe")
 97 |     print("\nBenchmark report:\n")
 98 |     print(table)
 99 |     print()
100 |     append_to_results = ask_yes_no("Append report to the results file?")
101 |     if append_to_results:
102 |         results_path = benchmark_path / "RESULTS.md"
103 |         current_date = datetime.now().strftime("%Y-%m-%d")
104 |         insert_markdown_section(results_path, current_date, table, 2)
105 | 
106 | 
107 | def to_emoji(value: bool) -> str:
108 |     return "\U00002705" if value else "\U0000274C"
109 | 
110 | 
111 | def insert_markdown_section(file_path, section_title, section_text, level):
112 |     with open(file_path, "r") as file:
113 |         lines = file.readlines()
114 | 
115 |     header_prefix = "#" * level
116 |     new_section = f"{header_prefix} {section_title}\n\n{section_text}\n\n"
117 | 
118 |     # Find the first section with the specified level
119 |     line_number = -1
120 |     for i, line in enumerate(lines):
121 |         if line.startswith(header_prefix):
122 |             line_number = i
123 |             break
124 | 
125 |     if line_number != -1:
126 |         lines.insert(line_number, new_section)
127 |     else:
128 |         print(
129 |             f"Markdown file was of unexpected format. No section of level {level} found. "
130 |             "Did not write results."
131 |         )
132 |         return
133 | 
134 |     # Write the file
135 |     with open(file_path, "w") as file:
136 |         file.writelines(lines)
137 | 
138 | 
139 | def ask_yes_no(question: str) -> bool:
140 |     while True:
141 |         response = input(question + " (y/n): ").lower().strip()
142 |         if response == "y":
143 |             return True
144 |         elif response == "n":
145 |             return False
146 |         else:
147 |             print("Please enter either 'y' or 'n'.")
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     run(main)
152 | 


--------------------------------------------------------------------------------
/scripts/clean_benchmarks.py:
--------------------------------------------------------------------------------
 1 | # list all folders in benchmark folder
 2 | # for each folder, run the benchmark
 3 | 
 4 | import os
 5 | import shutil
 6 | 
 7 | from pathlib import Path
 8 | 
 9 | from typer import run
10 | 
11 | 
12 | def main():
13 |     benchmarks = Path("benchmark")
14 | 
15 |     for benchmark in benchmarks.iterdir():
16 |         if benchmark.is_dir():
17 |             print(f"Cleaning {benchmark}")
18 |             for path in benchmark.iterdir():
19 |                 if path.name in ["prompt", "main_prompt"]:
20 |                     continue
21 | 
22 |                 # Get filename of Path object
23 |                 if path.is_dir():
24 |                     # delete the entire directory
25 |                     shutil.rmtree(path)
26 |                 else:
27 |                     # delete the file
28 |                     os.remove(path)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     run(main)
33 | 


--------------------------------------------------------------------------------
/scripts/print_chat.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import typer
 4 | 
 5 | from termcolor import colored
 6 | 
 7 | app = typer.Typer()
 8 | 
 9 | 
10 | def pretty_print_conversation(messages):
11 |     role_to_color = {
12 |         "system": "red",
13 |         "user": "green",
14 |         "assistant": "blue",
15 |         "function": "magenta",
16 |     }
17 |     formatted_messages = []
18 |     for message in messages:
19 |         if message["role"] == "function":
20 |             formatted_messages.append(
21 |                 f"function ({message['name']}): {message['content']}\n"
22 |             )
23 |         else:
24 |             assistant_content = (
25 |                 message["function_call"]
26 |                 if message.get("function_call")
27 |                 else message["content"]
28 |             )
29 |             role_to_message = {
30 |                 "system": f"system: {message['content']}\n",
31 |                 "user": f"user: {message['content']}\n",
32 |                 "assistant": f"assistant: {assistant_content}\n",
33 |             }
34 |             formatted_messages.append(role_to_message[message["role"]])
35 | 
36 |     for formatted_message in formatted_messages:
37 |         role = messages[formatted_messages.index(formatted_message)]["role"]
38 |         color = role_to_color[role]
39 |         print(colored(formatted_message, color))
40 | 
41 | 
42 | @app.command()
43 | def main(
44 |     messages_path: str,
45 | ):
46 |     with open(messages_path) as f:
47 |         messages = json.load(f)
48 | 
49 |     pretty_print_conversation(messages)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     app()
54 | 


--------------------------------------------------------------------------------
/scripts/rerun_edited_message_logs.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pathlib
 3 | 
 4 | from typing import Union
 5 | 
 6 | import typer
 7 | 
 8 | from gpt_engineer.ai import AI
 9 | from gpt_engineer.chat_to_files import to_files
10 | 
11 | app = typer.Typer()
12 | 
13 | 
14 | @app.command()
15 | def main(
16 |     messages_path: str,
17 |     out_path: Union[str, None] = None,
18 |     model: str = "gpt-4",
19 |     temperature: float = 0.1,
20 | ):
21 |     ai = AI(
22 |         model=model,
23 |         temperature=temperature,
24 |     )
25 | 
26 |     with open(messages_path) as f:
27 |         messages = json.load(f)
28 | 
29 |     messages = ai.next(messages)
30 | 
31 |     if out_path:
32 |         to_files(messages[-1]["content"], out_path)
33 |         with open(pathlib.Path(out_path) / "all_output.txt", "w") as f:
34 |             json.dump(messages[-1]["content"], f)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     app()
39 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morph-labs/gpt-engineer/900202a5498af365e91a46685afe5af787c05b3b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/steps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/morph-labs/gpt-engineer/900202a5498af365e91a46685afe5af787c05b3b/tests/steps/__init__.py


--------------------------------------------------------------------------------
/tests/steps/test_archive.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | from unittest.mock import MagicMock
 5 | 
 6 | from gpt_engineer.db import DB, DBs, archive
 7 | 
 8 | 
 9 | def freeze_at(monkeypatch, time):
10 |     datetime_mock = MagicMock(wraps=datetime.datetime)
11 |     datetime_mock.now.return_value = time
12 |     monkeypatch.setattr(datetime, "datetime", datetime_mock)
13 | 
14 | 
15 | def setup_dbs(tmp_path, dir_names):
16 |     directories = [tmp_path / name for name in dir_names]
17 | 
18 |     # Create DB objects
19 |     dbs = [DB(dir) for dir in directories]
20 | 
21 |     # Create DBs instance
22 |     return DBs(*dbs)
23 | 
24 | 
25 | def test_archive(tmp_path, monkeypatch):
26 |     dbs = setup_dbs(
27 |         tmp_path, ["memory", "logs", "preprompts", "input", "workspace", "archive"]
28 |     )
29 |     freeze_at(monkeypatch, datetime.datetime(2020, 12, 25, 17, 5, 55))
30 |     archive(dbs)
31 |     assert not os.path.exists(tmp_path / "memory")
32 |     assert not os.path.exists(tmp_path / "workspace")
33 |     assert os.path.isdir(tmp_path / "archive" / "20201225_170555")
34 | 
35 |     dbs = setup_dbs(
36 |         tmp_path, ["memory", "logs", "preprompts", "input", "workspace", "archive"]
37 |     )
38 |     freeze_at(monkeypatch, datetime.datetime(2022, 8, 14, 8, 5, 12))
39 |     archive(dbs)
40 |     assert not os.path.exists(tmp_path / "memory")
41 |     assert not os.path.exists(tmp_path / "workspace")
42 |     assert os.path.isdir(tmp_path / "archive" / "20201225_170555")
43 |     assert os.path.isdir(tmp_path / "archive" / "20220814_080512")
44 | 


--------------------------------------------------------------------------------
/tests/test_ai.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from gpt_engineer.ai import AI
 4 | 
 5 | 
 6 | @pytest.mark.xfail(reason="Constructor assumes API access")
 7 | def test_ai():
 8 |     AI()
 9 |     # TODO Assert that methods behave and not only constructor.
10 | 


--------------------------------------------------------------------------------
/tests/test_chat_to_files.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | 
  3 | from gpt_engineer.chat_to_files import to_files
  4 | 
  5 | 
  6 | def test_to_files():
  7 |     chat = textwrap.dedent(
  8 |         """
  9 |     This is a sample program.
 10 | 
 11 |     file1.py
 12 |     ```python
 13 |     print("Hello, World!")
 14 |     ```
 15 | 
 16 |     file2.py
 17 |     ```python
 18 |     def add(a, b):
 19 |         return a + b
 20 |     ```
 21 |     """
 22 |     )
 23 | 
 24 |     workspace = {}
 25 |     to_files(chat, workspace)
 26 | 
 27 |     assert workspace["all_output.txt"] == chat
 28 | 
 29 |     expected_files = {
 30 |         "file1.py": 'print("Hello, World!")\n',
 31 |         "file2.py": "def add(a, b):\n    return a + b\n",
 32 |         "README.md": "\nThis is a sample program.\n\nfile1.py\n",
 33 |     }
 34 | 
 35 |     for file_name, file_content in expected_files.items():
 36 |         assert workspace[file_name] == file_content
 37 | 
 38 | 
 39 | def test_to_files_with_square_brackets():
 40 |     chat = textwrap.dedent(
 41 |         """
 42 |     This is a sample program.
 43 | 
 44 |     [file1.py]
 45 |     ```python
 46 |     print("Hello, World!")
 47 |     ```
 48 | 
 49 |     [file2.py]
 50 |     ```python
 51 |     def add(a, b):
 52 |         return a + b
 53 |     ```
 54 |     """
 55 |     )
 56 |     workspace = {}
 57 |     to_files(chat, workspace)
 58 | 
 59 |     assert workspace["all_output.txt"] == chat
 60 | 
 61 |     expected_files = {
 62 |         "file1.py": 'print("Hello, World!")\n',
 63 |         "file2.py": "def add(a, b):\n    return a + b\n",
 64 |         "README.md": "\nThis is a sample program.\n\n[file1.py]\n",
 65 |     }
 66 | 
 67 |     for file_name, file_content in expected_files.items():
 68 |         assert workspace[file_name] == file_content
 69 | 
 70 | 
 71 | def test_files_with_brackets_in_name():
 72 |     chat = textwrap.dedent(
 73 |         """
 74 |     This is a sample program.
 75 | 
 76 |     [id].jsx
 77 |     ```javascript
 78 |     console.log("Hello, World!")
 79 |     ```
 80 |     """
 81 |     )
 82 | 
 83 |     workspace = {}
 84 |     to_files(chat, workspace)
 85 | 
 86 |     assert workspace["all_output.txt"] == chat
 87 | 
 88 |     expected_files = {
 89 |         "[id].jsx": 'console.log("Hello, World!")\n',
 90 |         "README.md": "\nThis is a sample program.\n\n[id].jsx\n",
 91 |     }
 92 | 
 93 |     for file_name, file_content in expected_files.items():
 94 |         assert workspace[file_name] == file_content
 95 | 
 96 | 
 97 | def test_files_with_file_colon():
 98 |     chat = textwrap.dedent(
 99 |         """
100 |     This is a sample program.
101 | 
102 |     [FILE: file1.py]
103 |     ```python
104 |     print("Hello, World!")
105 |     ```
106 |     """
107 |     )
108 | 
109 |     workspace = {}
110 |     to_files(chat, workspace)
111 | 
112 |     assert workspace["all_output.txt"] == chat
113 | 
114 |     expected_files = {
115 |         "file1.py": 'print("Hello, World!")\n',
116 |         "README.md": "\nThis is a sample program.\n\n[FILE: file1.py]\n",
117 |     }
118 | 
119 |     for file_name, file_content in expected_files.items():
120 |         assert workspace[file_name] == file_content
121 | 
122 | 
123 | def test_files_with_back_tick():
124 |     chat = textwrap.dedent(
125 |         """
126 |     This is a sample program.
127 | 
128 |     `file1.py`
129 |     ```python
130 |     print("Hello, World!")
131 |     ```
132 |     """
133 |     )
134 | 
135 |     workspace = {}
136 |     to_files(chat, workspace)
137 | 
138 |     assert workspace["all_output.txt"] == chat
139 | 
140 |     expected_files = {
141 |         "file1.py": 'print("Hello, World!")\n',
142 |         "README.md": "\nThis is a sample program.\n\n`file1.py`\n",
143 |     }
144 | 
145 |     for file_name, file_content in expected_files.items():
146 |         assert workspace[file_name] == file_content
147 | 
148 | 
149 | def test_files_with_newline_between():
150 |     chat = textwrap.dedent(
151 |         """
152 |     This is a sample program.
153 | 
154 |     file1.py
155 | 
156 |     ```python
157 |     print("Hello, World!")
158 |     ```
159 |     """
160 |     )
161 | 
162 |     workspace = {}
163 |     to_files(chat, workspace)
164 | 
165 |     assert workspace["all_output.txt"] == chat
166 | 
167 |     expected_files = {
168 |         "file1.py": 'print("Hello, World!")\n',
169 |         "README.md": "\nThis is a sample program.\n\nfile1.py\n\n",
170 |     }
171 | 
172 |     for file_name, file_content in expected_files.items():
173 |         assert workspace[file_name] == file_content
174 | 
175 | 
176 | def test_files_with_newline_between_header():
177 |     chat = textwrap.dedent(
178 |         """
179 |     This is a sample program.
180 | 
181 |     ## file1.py
182 | 
183 |     ```python
184 |     print("Hello, World!")
185 |     ```
186 |     """
187 |     )
188 | 
189 |     workspace = {}
190 |     to_files(chat, workspace)
191 | 
192 |     assert workspace["all_output.txt"] == chat
193 | 
194 |     expected_files = {
195 |         "file1.py": 'print("Hello, World!")\n',
196 |         "README.md": "\nThis is a sample program.\n\n## file1.py\n\n",
197 |     }
198 | 
199 |     for file_name, file_content in expected_files.items():
200 |         assert workspace[file_name] == file_content
201 | 


--------------------------------------------------------------------------------
/tests/test_collect.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from unittest.mock import MagicMock
 5 | 
 6 | import pytest
 7 | import rudderstack.analytics as rudder_analytics
 8 | 
 9 | from gpt_engineer.collect import collect_learnings, steps_file_hash
10 | from gpt_engineer.db import DB, DBs
11 | from gpt_engineer.learning import extract_learning
12 | from gpt_engineer.steps import gen_code
13 | 
14 | 
15 | def test_collect_learnings(monkeypatch):
16 |     monkeypatch.setattr(os, "environ", {"COLLECT_LEARNINGS_OPT_IN": "true"})
17 |     monkeypatch.setattr(rudder_analytics, "track", MagicMock())
18 | 
19 |     model = "test_model"
20 |     temperature = 0.5
21 |     steps = [gen_code]
22 |     dbs = DBs(DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"))
23 |     dbs.input = {
24 |         "prompt": "test prompt\n with newlines",
25 |         "feedback": "test feedback",
26 |     }
27 |     code = "this is output\n\nit contains code"
28 |     dbs.logs = {gen_code.__name__: json.dumps([{"role": "system", "content": code}])}
29 |     dbs.workspace = {"all_output.txt": "test workspace\n" + code}
30 | 
31 |     collect_learnings(model, temperature, steps, dbs)
32 | 
33 |     learnings = extract_learning(
34 |         model, temperature, steps, dbs, steps_file_hash=steps_file_hash()
35 |     )
36 |     assert rudder_analytics.track.call_count == 1
37 |     assert rudder_analytics.track.call_args[1]["event"] == "learning"
38 |     a = {
39 |         k: v
40 |         for k, v in rudder_analytics.track.call_args[1]["properties"].items()
41 |         if k != "timestamp"
42 |     }
43 |     b = {k: v for k, v in learnings.to_dict().items() if k != "timestamp"}
44 |     assert a == b
45 | 
46 |     assert code in learnings.logs
47 |     assert code in learnings.workspace
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     pytest.main(["-v"])
52 | 


--------------------------------------------------------------------------------
/tests/test_db.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from gpt_engineer.db import DB, DBs
  4 | 
  5 | 
  6 | def test_DB_operations(tmp_path):
  7 |     # Test initialization
  8 |     db = DB(tmp_path)
  9 | 
 10 |     # Test __setitem__
 11 |     db["test_key"] = "test_value"
 12 | 
 13 |     assert (tmp_path / "test_key").is_file()
 14 | 
 15 |     # Test __getitem__
 16 |     val = db["test_key"]
 17 | 
 18 |     assert val == "test_value"
 19 | 
 20 |     # Test error on getting non-existent key
 21 |     with pytest.raises(KeyError):
 22 |         db["non_existent"]
 23 | 
 24 |     # Test error on setting non-str or non-bytes value
 25 |     with pytest.raises(TypeError):
 26 |         db["key"] = ["Invalid", "value"]
 27 | 
 28 | 
 29 | def test_DBs_initialization(tmp_path):
 30 |     dir_names = ["memory", "logs", "preprompts", "input", "workspace", "archive"]
 31 |     directories = [tmp_path / name for name in dir_names]
 32 | 
 33 |     # Create DB objects
 34 |     dbs = [DB(dir) for dir in directories]
 35 | 
 36 |     # Create DB instance
 37 |     dbs_instance = DBs(*dbs)
 38 | 
 39 |     assert isinstance(dbs_instance.memory, DB)
 40 |     assert isinstance(dbs_instance.logs, DB)
 41 |     assert isinstance(dbs_instance.preprompts, DB)
 42 |     assert isinstance(dbs_instance.input, DB)
 43 |     assert isinstance(dbs_instance.workspace, DB)
 44 |     assert isinstance(dbs_instance.archive, DB)
 45 | 
 46 | 
 47 | def test_invalid_path():
 48 |     with pytest.raises((PermissionError, OSError)):
 49 |         # Test with a path that will raise a permission error
 50 |         DB("/root/test")
 51 | 
 52 | 
 53 | def test_large_files(tmp_path):
 54 |     db = DB(tmp_path)
 55 |     large_content = "a" * (10**6)  # 1MB of data
 56 | 
 57 |     # Test write large files
 58 |     db["large_file"] = large_content
 59 | 
 60 |     # Test read large files
 61 |     assert db["large_file"] == large_content
 62 | 
 63 | 
 64 | def test_concurrent_access(tmp_path):
 65 |     import threading
 66 | 
 67 |     db = DB(tmp_path)
 68 | 
 69 |     num_threads = 10
 70 |     num_writes = 1000
 71 | 
 72 |     def write_to_db(thread_id):
 73 |         for i in range(num_writes):
 74 |             key = f"thread{thread_id}_write{i}"
 75 |             db[key] = str(i)
 76 | 
 77 |     threads = []
 78 |     for thread_id in range(num_threads):
 79 |         t = threading.Thread(target=write_to_db, args=(thread_id,))
 80 |         t.start()
 81 |         threads.append(t)
 82 | 
 83 |     for t in threads:
 84 |         t.join()
 85 | 
 86 |     # Verify that all expected data was written
 87 |     for thread_id in range(num_threads):
 88 |         for i in range(num_writes):
 89 |             key = f"thread{thread_id}_write{i}"
 90 |             assert key in db  # using __contains__ now
 91 |             assert db[key] == str(i)
 92 | 
 93 | 
 94 | def test_error_messages(tmp_path):
 95 |     db = DB(tmp_path)
 96 | 
 97 |     with pytest.raises(TypeError) as e:
 98 |         db["key"] = ["Invalid", "value"]
 99 | 
100 |     assert str(e.value) == "val must be either a str or bytes"
101 | 
102 | 
103 | def test_DBs_instantiation_with_wrong_number_of_arguments(tmp_path):
104 |     db = DB(tmp_path)
105 | 
106 |     with pytest.raises(TypeError):
107 |         DBs(db, db, db)
108 | 
109 |     with pytest.raises(TypeError):
110 |         DBs(db, db, db, db, db, db, db)
111 | 
112 | 
113 | def test_DBs_dataclass_attributes(tmp_path):
114 |     dir_names = ["memory", "logs", "preprompts", "input", "workspace", "archive"]
115 |     directories = [tmp_path / name for name in dir_names]
116 | 
117 |     # Create DB objects
118 |     dbs = [DB(dir) for dir in directories]
119 | 
120 |     # Create DBs instance
121 |     dbs_instance = DBs(*dbs)
122 | 
123 |     assert dbs_instance.memory == dbs[0]
124 |     assert dbs_instance.logs == dbs[1]
125 |     assert dbs_instance.preprompts == dbs[2]
126 |     assert dbs_instance.input == dbs[3]
127 |     assert dbs_instance.workspace == dbs[4]
128 | 


--------------------------------------------------------------------------------