├── local-dev-requirements.txt ├── examples ├── __init__.py ├── add.py ├── file_sizes.py ├── fizzbuzz.py ├── file_sizes_ext.py └── todo.py ├── .vscode └── settings.json ├── src └── maccarone │ ├── __init__.py │ ├── openai.py │ ├── scripts │ └── preprocess.py │ ├── test │ └── test_preprocessor.py │ └── preprocessor.py ├── .github └── workflows │ ├── run-pytest.yml │ └── publish-to-pypi.yml ├── pyproject.toml ├── LICENSE ├── dev-requirements.txt ├── .gitignore └── README.md /local-dev-requirements.txt: -------------------------------------------------------------------------------- 1 | -r dev-requirements.txt 2 | -e .[dev] 3 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | import maccarone 2 | 3 | maccarone.enable(include_pattern="example.*") 4 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": [ 3 | "src" 4 | ], 5 | "python.testing.unittestEnabled": false, 6 | "python.testing.pytestEnabled": true 7 | } 8 | -------------------------------------------------------------------------------- /src/maccarone/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import ( 2 | version, 3 | PackageNotFoundError, 4 | ) 5 | 6 | try: 7 | __version__ = version(__name__) 8 | except PackageNotFoundError as error: 9 | __version__ = "unknown" 10 | -------------------------------------------------------------------------------- /examples/add.py: -------------------------------------------------------------------------------- 1 | def add_two_numbers(x, y): 2 | #<> 3 | return x + y 4 | #<> 5 | 6 | #<> 7 | import argparse 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("x", type=int) 10 | parser.add_argument("y", type=int) 11 | args = parser.parse_args() 12 | print(add_two_numbers(args.x, args.y)) 13 | #<> 14 | -------------------------------------------------------------------------------- /.github/workflows/run-pytest.yml: -------------------------------------------------------------------------------- 1 | name: Pytest 2 | on: [push] 3 | jobs: 4 | run-pytest: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.8", "3.9", "3.10", "3.11"] 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-python@v4 12 | with: 13 | python-version: ${{ matrix.python-version }} 14 | - run: python -m pip install --upgrade pip 15 | - name: Install package 16 | run: | 17 | pip install .[dev] 18 | - name: Run pytest 19 | run: | 20 | pytest 21 | -------------------------------------------------------------------------------- /examples/file_sizes.py: -------------------------------------------------------------------------------- 1 | def main(path: str): 2 | #<> 3 | import os 4 | filenames = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] 5 | #<> 6 | 7 | for fn in filenames: 8 | #<> 9 | size = os.path.getsize(fn) 10 | #<> 11 | print(fn, size) 12 | 13 | #<> 14 | import argparse 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("path", type=str) 17 | args = parser.parse_args() 18 | main(args.path) 19 | #<> 20 | -------------------------------------------------------------------------------- /examples/fizzbuzz.py: -------------------------------------------------------------------------------- 1 | def main(n: int): 2 | #<> 3 | for i in range(1, n+1): 4 | if i % 3 == 0 and i % 5 == 0: 5 | print("FizzBuzz") 6 | elif i % 3 == 0: 7 | print("Fizz") 8 | elif i % 5 == 0: 9 | print("Buzz") 10 | else: 11 | print(i) 12 | #<> 13 | 14 | #<< 15 | # parse command line args for main 16 | # call main with those args 17 | #>> 18 | import argparse 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("n", type=int) 21 | args = parser.parse_args() 22 | main(args.n) 23 | #<> 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "maccarone" 7 | readme = "README.md" 8 | description = "Mix natural language into your Python code" 9 | requires-python = ">=3.8" 10 | dependencies = [ 11 | "openai", 12 | "parsimonious", 13 | ] 14 | dynamic = ["version"] 15 | 16 | [project.urls] 17 | "Homepage" = "https://github.com/bsilverthorn/maccarone" 18 | "Repository" = "https://github.com/bsilverthorn/maccarone" 19 | 20 | [project.optional-dependencies] 21 | dev = [ 22 | "pytest", 23 | "pytest-asyncio", 24 | "ipython", 25 | "pip-tools", 26 | ] 27 | 28 | [project.scripts] 29 | maccarone = "maccarone.scripts.preprocess:script_main" 30 | 31 | [tool.setuptools_scm] 32 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: PyPI (`maccarone`) 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | publish-to-pypi: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8"] 11 | steps: 12 | - uses: actions/checkout@v3 13 | - uses: actions/setup-python@v4 14 | with: 15 | python-version: ${{ matrix.python-version }} 16 | - run: python -m pip install --upgrade pip 17 | - name: Build package 18 | run: | 19 | pip install build 20 | python -m build 21 | - name: Publish package 22 | run: | 23 | pip install twine 24 | twine upload dist/* 25 | env: 26 | TWINE_USERNAME: __token__ 27 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 28 | -------------------------------------------------------------------------------- /examples/file_sizes_ext.py: -------------------------------------------------------------------------------- 1 | #<> 2 | import os 3 | import argparse 4 | from typing import Optional 5 | from termcolor import colored 6 | #<> 7 | 8 | def main(path: str, extension: str | None): 9 | #<> 10 | filenames = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and (f.endswith(extension) if extension else True)] 11 | #<> 12 | 13 | for fn in filenames: 14 | #<> 15 | size = os.path.getsize(os.path.join(path, fn)) 16 | #<> 17 | 18 | #<> 19 | print(colored(f"Filename: {fn}", 'green'), colored(f"Size: {size} bytes", 'blue')) 20 | #<> 21 | 22 | #<> 23 | if __name__ == "__main__": 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("path", type=str, help="Path to the directory") 26 | parser.add_argument("--extension", type=str, help="File extension to filter by", default=None) 27 | args = parser.parse_args() 28 | main(args.path, args.extension) 29 | #<> 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Bryan Silverthorn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/maccarone/openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | from typing import ( 5 | List, 6 | Dict, 7 | Callable, 8 | Iterable, 9 | cast, 10 | ) 11 | 12 | import openai 13 | 14 | from openai import ChatCompletion 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | openai.api_key = os.getenv("OPENAI_API_KEY") 19 | openai.api_base = os.getenv("OPENAI_API_BASE", openai.api_base) 20 | 21 | def complete_chat( 22 | messages: List[Dict[str, str]], 23 | model="gpt-4", 24 | on_token: Callable[[int], None] = lambda p: None, 25 | ) -> str: 26 | helicone_key = os.getenv("HELICONE_API_KEY") 27 | 28 | if helicone_key is None: 29 | headers = {} 30 | else: 31 | headers={"Helicone-Auth": helicone_key} 32 | 33 | responses = cast( 34 | Iterable[ChatCompletion], 35 | ChatCompletion.create( 36 | model=model, 37 | messages=messages, 38 | stream=True, 39 | temperature=0.0, 40 | headers=headers, 41 | ), 42 | ) 43 | completion = "" 44 | 45 | logger.info("completing: %r", messages) 46 | 47 | for (i, partial) in enumerate(responses): 48 | delta = partial.choices[0].delta 49 | 50 | try: 51 | completion += str(delta.content) 52 | except AttributeError as error: 53 | pass 54 | 55 | on_token(i) 56 | 57 | logger.info("completion: %r", completion) 58 | 59 | return completion 60 | 61 | class ChatAPI: 62 | def complete_chat( 63 | self, 64 | chat_name: str, 65 | messages: List[Dict[str, str]], 66 | model="gpt-4", 67 | ) -> str: 68 | return complete_chat(messages, model=model) 69 | -------------------------------------------------------------------------------- /examples/todo.py: -------------------------------------------------------------------------------- 1 | class Todo: 2 | #<> 3 | def __init__(self, description, status='not done', id=None): 4 | self.description = description 5 | self.status = status 6 | self.id = id if id is not None else uuid.uuid4() 7 | #<> 8 | 9 | def load_todos() -> list[Todo]: 10 | #<> 11 | import json 12 | import os 13 | def load_todos() -> list[Todo]: 14 | if os.path.exists('~/.todos.json'): 15 | with open('~/.todos.json', 'r') as f: 16 | todos = json.load(f) 17 | return [Todo(**todo) for todo in todos] 18 | else: 19 | return [] 20 | #<> 21 | 22 | todos = load_todos() 23 | 24 | def save_todos(todos: list[Todo]) -> None: 25 | #<> 26 | def save_todos(todos: list[Todo]) -> None: 27 | with open('~/.todos.json', 'w') as f: 28 | json.dump([todo.__dict__ for todo in todos], f) 29 | #<> 30 | 31 | def add_todo(description): 32 | #<> 33 | def add_todo(description): 34 | todo = Todo(description) 35 | todos.append(todo) 36 | save_todos(todos) 37 | #<> 38 | 39 | def remove_todo(id): 40 | #<> 41 | def remove_todo(id): 42 | todos = [todo for todo in todos if todo.id != id] 43 | save_todos(todos) 44 | #<> 45 | 46 | def print_todos(): 47 | #<> 48 | def print_todos(): 49 | for todo in todos: 50 | print(f'{todo.id}: {todo.description} - {todo.status}') 51 | #<> 52 | 53 | #<> 54 | import argparse 55 | def main(): 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--add', help='Add a new todo') 58 | parser.add_argument('--remove', help='Remove a todo by id') 59 | parser.add_argument('--list', action='store_true', help='List all todos') 60 | args = parser.parse_args() 61 | 62 | if args.add: 63 | add_todo(args.add) 64 | elif args.remove: 65 | remove_todo(args.remove) 66 | elif args.list: 67 | print_todos() 68 | else: 69 | parser.print_help() 70 | 71 | if __name__ == '__main__': 72 | main() 73 | #<> 74 | -------------------------------------------------------------------------------- /src/maccarone/scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import glob 4 | import logging 5 | 6 | from argparse import Namespace 7 | from typing import Optional 8 | 9 | from maccarone.openai import ChatAPI 10 | from maccarone.preprocessor import preprocess_maccarone 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | def preprocess( 15 | mn_path: str, 16 | print_: bool, 17 | rewrite: bool, 18 | block_at_line: Optional[int], 19 | ) -> None: 20 | # produce Python source 21 | logger.info("preprocessing %s", mn_path) 22 | 23 | chat_api = ChatAPI() 24 | 25 | #<> 26 | with open(mn_path, 'r') as file: 27 | mn_source = file.read() 28 | #<> 29 | 30 | py_source = preprocess_maccarone(mn_source, chat_api, block_at_line=block_at_line) 31 | 32 | if rewrite: 33 | #<> 34 | py_path = os.path.splitext(mn_path)[0] + '.py' 35 | with open(py_path, 'w') as file: 36 | file.write(py_source) 37 | #<> 38 | 39 | if print_: 40 | print(py_source, end="") 41 | 42 | def main(path: str, print_: bool, rewrite: bool, suffix: str, block_at_line: Optional[int] = None) -> None: 43 | """Preprocess files with Maccarone snippets.""" 44 | 45 | if os.path.isdir(path): 46 | mn_files = glob.glob( 47 | os.path.join(path, f"**/*{suffix}"), 48 | recursive=True, 49 | ) 50 | else: 51 | mn_files = [path] 52 | 53 | #<> 54 | for mn_file in mn_files: 55 | preprocess(mn_file, print_, rewrite, block_at_line) 56 | #<> 57 | 58 | def parse_args() -> Namespace: 59 | #<< 60 | # get args for main() and return; use argparse 61 | # set the `print_` var for `--print` 62 | # default suffix: ".py" 63 | #>> 64 | import argparse 65 | parser = argparse.ArgumentParser() 66 | parser.add_argument("path", help="Path to the file or directory to preprocess") 67 | parser.add_argument("--print", dest="print_", action="store_true", help="Print the preprocessed source code") 68 | parser.add_argument("--rewrite", action="store_true", help="Rewrite the source file with the preprocessed code") 69 | parser.add_argument("--suffix", default=".py", help="Suffix for the preprocessed files") 70 | parser.add_argument("--block-at-line", type=int, help="Preprocess only the block at given line") 71 | args = parser.parse_args() 72 | return args 73 | #<> 74 | 75 | def script_main(): 76 | logging.basicConfig(level=logging.INFO) 77 | 78 | return main(**vars(parse_args())) 79 | 80 | if __name__ == "__main__": 81 | script_main() 82 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.8 3 | # by the following command: 4 | # 5 | # pip-compile --extra=dev --output-file=dev-requirements.txt 6 | # 7 | aiohttp==3.8.4 8 | # via openai 9 | aiosignal==1.3.1 10 | # via aiohttp 11 | asttokens==2.2.1 12 | # via stack-data 13 | async-timeout==4.0.2 14 | # via aiohttp 15 | attrs==23.1.0 16 | # via aiohttp 17 | backcall==0.2.0 18 | # via ipython 19 | build==0.10.0 20 | # via pip-tools 21 | certifi==2023.5.7 22 | # via requests 23 | charset-normalizer==3.1.0 24 | # via 25 | # aiohttp 26 | # requests 27 | click==8.1.3 28 | # via pip-tools 29 | decorator==5.1.1 30 | # via ipython 31 | exceptiongroup==1.1.1 32 | # via pytest 33 | executing==1.2.0 34 | # via stack-data 35 | frozenlist==1.3.3 36 | # via 37 | # aiohttp 38 | # aiosignal 39 | idna==3.4 40 | # via 41 | # requests 42 | # yarl 43 | iniconfig==2.0.0 44 | # via pytest 45 | ipython==8.12.2 46 | # via maccarone (pyproject.toml) 47 | jedi==0.18.2 48 | # via ipython 49 | matplotlib-inline==0.1.6 50 | # via ipython 51 | multidict==6.0.4 52 | # via 53 | # aiohttp 54 | # yarl 55 | openai==0.27.7 56 | # via maccarone (pyproject.toml) 57 | packaging==23.1 58 | # via 59 | # build 60 | # pytest 61 | parsimonious==0.10.0 62 | # via maccarone (pyproject.toml) 63 | parso==0.8.3 64 | # via jedi 65 | pexpect==4.8.0 66 | # via ipython 67 | pickleshare==0.7.5 68 | # via ipython 69 | pip-tools==6.13.0 70 | # via maccarone (pyproject.toml) 71 | pluggy==1.0.0 72 | # via pytest 73 | prompt-toolkit==3.0.38 74 | # via ipython 75 | ptyprocess==0.7.0 76 | # via pexpect 77 | pure-eval==0.2.2 78 | # via stack-data 79 | pygments==2.15.1 80 | # via ipython 81 | pyproject-hooks==1.0.0 82 | # via build 83 | pytest==7.3.1 84 | # via 85 | # maccarone (pyproject.toml) 86 | # pytest-asyncio 87 | pytest-asyncio==0.21.0 88 | # via maccarone (pyproject.toml) 89 | regex==2023.6.3 90 | # via parsimonious 91 | requests==2.30.0 92 | # via openai 93 | six==1.16.0 94 | # via asttokens 95 | stack-data==0.6.2 96 | # via ipython 97 | tomli==2.0.1 98 | # via 99 | # build 100 | # pyproject-hooks 101 | # pytest 102 | tqdm==4.65.0 103 | # via openai 104 | traitlets==5.9.0 105 | # via 106 | # ipython 107 | # matplotlib-inline 108 | typing-extensions==4.7.1 109 | # via ipython 110 | urllib3==2.0.2 111 | # via requests 112 | wcwidth==0.2.6 113 | # via prompt-toolkit 114 | wheel==0.40.0 115 | # via pip-tools 116 | yarl==1.9.2 117 | # via aiohttp 118 | 119 | # The following packages are considered to be unsafe in a requirements file: 120 | # pip 121 | # setuptools 122 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | venv-*/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | #.idea/ 162 | 163 | # direnv files (often contain secrets) 164 | .envrc 165 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Maccarone: AI-managed code blocks in Python ⏪⏩ 2 | =============================================== 3 | 4 | [![PyPI version](https://badge.fury.io/py/maccarone.svg)](https://badge.fury.io/py/maccarone) 5 | 6 | Maccarone lets you [_delegate_](https://silverthorn.blog/posts/2023-08-llm-assisted-programming-maccarone/) sections of your Python program to AI ownership. 7 | 8 | Here's what it looks like in [the VS Code extension](https://marketplace.visualstudio.com/items?itemName=maccarone.maccarone): 9 | 10 | ![screencap-20230629](https://github.com/bsilverthorn/maccarone/assets/92956/c1549168-28ad-49ef-bcff-dd232838220c) 11 | 12 | Example 13 | ------- 14 | 15 | You might write some code like this: 16 | 17 | ```python 18 | def main(path: str): 19 | #<> 20 | 21 | for fn in filenames: 22 | #<> 23 | 24 | print(fn, size) 25 | 26 | #<> 27 | ``` 28 | 29 | Maccarone then fills in the sections you've delegated: 30 | 31 | ```python 32 | def main(path: str): 33 | #<> 34 | import os 35 | filenames = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] 36 | #<> 37 | 38 | for fn in filenames: 39 | #<> 40 | size = os.path.getsize(os.path.join(path, fn)) 41 | #<> 42 | print(fn, size) 43 | 44 | #<> 45 | import argparse 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument("path", type=str) 48 | args = parser.parse_args() 49 | main(args.path) 50 | #<> 51 | ``` 52 | 53 | Make a change in your code, like adding an `extension` parameter to `main`, and Maccarone keeps its sections up to date: 54 | 55 | ```python 56 | def main(path: str, extension: str | None = None): 57 | #<> 58 | … 59 | if extension: 60 | filenames = [f for f in filenames if f.endswith(extension)] 61 | #<> 62 | … 63 | 64 | #<> 65 | … 66 | parser.add_argument("--extension", type=str, default=None) 67 | args = parser.parse_args() 68 | main(args.path, args.extension) 69 | #<> 70 | ``` 71 | 72 | Quickstart 73 | ---------- 74 | 75 | ### Prerequisites 76 | 77 | - Python 3.8+ 78 | - OpenAI API key with GPT-4 (`export OPENAI_API_KEY`) 79 | 80 | ### Easy Mode - VS Code Extension 81 | 82 | Easy mode is the free extension from [the VS Code marketplace](https://marketplace.visualstudio.com/items?itemName=maccarone.maccarone). 83 | 84 | Install it in VS Code and you're done (if you have the prerequisites above). 85 | 86 | ### Other Option - Command Line 87 | 88 | If you don't use VS Code, you can still install Maccarone directly from PyPI: 89 | 90 | - `pip install maccarone` 91 | 92 | Then run `maccarone` to generate code and update your source file: 93 | 94 | ```console 95 | $ maccarone --rewrite examples/file_sizes.py 96 | ``` 97 | 98 | Usage notes 99 | ----------- 100 | 101 | ### Running `maccarone` on a directory 102 | 103 | Maccarone can rewrite all files in a directory: 104 | 105 | ```console 106 | $ maccarone --rewrite --suffix .py examples/ 107 | ``` 108 | 109 | Be careful! You should probably run this only on files in source control, for example. 110 | 111 | Related work 112 | ------------ 113 | 114 | - https://github.com/bsilverthorn/vernac 115 | 116 | FAQs 117 | ---- 118 | 119 | ### It needs my OpenAI API key? 120 | 121 | Maccarone prompts GPT-4 to write code. It will make OpenAI API calls using your key and you **will be charged** by OpenAI. 122 | 123 | API calls are made every time Maccarone preprocesses a new version of a source file. 124 | 125 | The number of tokens consumed is proportional to the size of your completed code. You cannot accurately predict that number in advance. A small source module might cost $0.01–0.10 to preprocess. 126 | 127 | ### What prevents my program from behaving differently after each preprocessing run? 128 | 129 | The strength of your faith in GPT-4. 130 | 131 | ### What about non-English languages? 132 | 133 | They are likely to work, but less likely than English. 134 | 135 | ### What does "maccarone" mean? 136 | 137 | https://en.wikipedia.org/wiki/Macaronic_language 138 | 139 | ### Is this project active? 140 | 141 | Yes and no. It was created to evaluate a specific flavor of LLM-assisted programming. It feels feature-complete for that purpose. 142 | 143 | PRs and bug reports are welcome, however, and there may be future maintenance releases. 144 | -------------------------------------------------------------------------------- /src/maccarone/test/test_preprocessor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textwrap import dedent 4 | 5 | from maccarone.preprocessor import ( 6 | PresentPiece, 7 | MissingPiece, 8 | find_line_number, 9 | raw_source_to_pieces, 10 | raw_pieces_to_tagged_input, 11 | tagged_output_to_completed_pieces, 12 | ) 13 | 14 | LB = "<" 15 | RB = ">" 16 | LL = "<<" # hide test content from maccarone itself 17 | RR = ">>" 18 | CLOSE = f"#{LL}/{RR}" 19 | 20 | @pytest.mark.parametrize("input, expected", [ 21 | ( 22 | f""" 23 | this source has 24 | #{LL}a missing piece{RR} 25 | above 26 | """, 27 | [ 28 | PresentPiece(0, 17, "\nthis source has\n"), 29 | MissingPiece(17, 38, "", "a missing piece"), 30 | PresentPiece(38, 44, "above\n"), 31 | ], 32 | ), 33 | ( 34 | f""" 35 | this source has 36 | #{LL}a missing piece{RR} 37 | with inline source 38 | {CLOSE} 39 | above 40 | """, 41 | [ 42 | PresentPiece(0, 17, "\nthis source has\n"), 43 | MissingPiece(17, 64, "", "a missing piece", "with inline source\n"), 44 | PresentPiece(64, 70, "above\n"), 45 | ], 46 | ), 47 | ( 48 | f""" 49 | this source has 50 | #{LL} 51 | # a missing piece 52 | # with multiline guidance 53 | #{RR} 54 | and inline source 55 | {CLOSE} 56 | above 57 | """, 58 | [ 59 | PresentPiece(0, 17, "\nthis source has\n"), 60 | MissingPiece( 61 | 17, 62 | 94, 63 | "", 64 | " a missing piece\n with multiline guidance", 65 | "and inline source\n", 66 | ), 67 | PresentPiece(94, 100, "above\n"), 68 | ], 69 | ), 70 | ( 71 | f""" 72 | this source has...* 73 | #{LL}various special chars, (like this){RR} 74 | and inline source with more chars _-%$ 75 | {CLOSE} 76 | `and more!` 77 | """, 78 | [ 79 | PresentPiece(0, 21, "\nthis source has...*\n"), 80 | MissingPiece( 81 | 21, 82 | 107, 83 | "", 84 | "various special chars, (like this)", 85 | "and inline source with more chars _-%$\n", 86 | ), 87 | PresentPiece(107, 119, "`and more!`\n"), 88 | ], 89 | ), 90 | ]) 91 | def test_raw_source_to_pieces(input, expected): 92 | assert list(raw_source_to_pieces(dedent(input))) == expected 93 | 94 | @pytest.mark.parametrize("raw_pieces, expected", [ 95 | ( 96 | [ 97 | # using fake start/end positions for convenience 98 | PresentPiece(0, 0, "\ndef add_two_numbers(x, y):\n "), 99 | MissingPiece(0, 0, " ", "add the args"), 100 | PresentPiece(0, 0, "\n\n"), 101 | MissingPiece(0, 0, "", "add two numbers from command line args, using argparse"), 102 | PresentPiece(0, 0, "\n"), 103 | ], 104 | dedent(f""" 105 | def add_two_numbers(x, y): 106 | # {LB}write_this id="0"{RB} 107 | # add the args 108 | # 109 | 110 | # {LB}write_this id="1"{RB} 111 | # add two numbers from command line args, using argparse 112 | # 113 | """), 114 | ), 115 | ]) 116 | def test_raw_source_to_tagged_input(raw_pieces, expected): 117 | assert raw_pieces_to_tagged_input(raw_pieces) == expected 118 | 119 | @pytest.mark.parametrize("tagged, expected", [ 120 | ( 121 | f'{LB}completed id="0"{RB}\ndef add_two_numbers(x, y):\n return x + y\n\n', 122 | {0: 'def add_two_numbers(x, y):\n return x + y\n'} 123 | ), 124 | ( 125 | f'{LB}completed id="1"{RB}\ndef subtract_two_numbers(x, y):\n return x - y\n\n', 126 | {1: 'def subtract_two_numbers(x, y):\n return x - y\n'} 127 | ), 128 | ( 129 | f'{LB}completed id="1"{RB}\nfoo\n\n{LB}completed id="2"{RB}\ndef multiply_two_numbers(x, y):\n return x * y\n\n', 130 | { 131 | 1: "foo\n", 132 | 2: 'def multiply_two_numbers(x, y):\n return x * y\n' 133 | } 134 | ), 135 | ]) 136 | def test_tagged_output_to_completed_pieces(tagged, expected): 137 | assert tagged_output_to_completed_pieces(tagged) == expected 138 | 139 | #<> 140 | @pytest.mark.parametrize("text, pos, expected", [ 141 | ("hello\nworld", 0, 1), 142 | ("hello\nworld", 5, 1), 143 | ("hello\nworld", 6, 2), 144 | ("hello\nworld", 11, 2), 145 | ("\nhello\nworld", 0, 1), 146 | ("\nhello\nworld", 1, 2), 147 | ("\nhello\nworld", 6, 2), 148 | ("\nhello\nworld", 7, 3), 149 | ("\nhello\nworld", 12, 3), 150 | ]) 151 | def test_find_line_number(text, pos, expected): 152 | assert find_line_number(text, pos) == expected 153 | #<> 154 | -------------------------------------------------------------------------------- /src/maccarone/preprocessor.py: -------------------------------------------------------------------------------- 1 | import re 2 | import logging 3 | 4 | from dataclasses import dataclass 5 | from itertools import chain 6 | from typing import ( 7 | Dict, 8 | List, 9 | Optional, 10 | Tuple, 11 | ) 12 | 13 | from parsimonious.nodes import ( 14 | Node, 15 | NodeVisitor, 16 | ) 17 | from parsimonious.grammar import Grammar 18 | 19 | from maccarone.openai import ChatAPI 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | @dataclass 24 | class Piece: 25 | start: int 26 | end: int 27 | 28 | @dataclass 29 | class PresentPiece(Piece): 30 | text: str 31 | 32 | @dataclass 33 | class MissingPiece(Piece): 34 | indent: str 35 | guidance: str 36 | inlined: Optional[str] = None 37 | enabled: bool = True 38 | 39 | def get_line_pos(self, raw_source: str) -> Tuple[int, int]: 40 | start_line = raw_source.count('\n', 0, self.start) + 1 41 | end_line = raw_source.count('\n', 0, self.end) + 1 42 | 43 | return (start_line, end_line) 44 | 45 | def complete(self, replacement: Optional[str]) -> str: 46 | (indent, guidance) = (self.indent, self.guidance) 47 | 48 | if "\n" in guidance: 49 | guidance_lines = "\n" 50 | guidance_lines += "\n".join(f"{indent}#{line}" for line in guidance.splitlines()) 51 | guidance_lines += f"\n{indent}#" 52 | else: 53 | guidance_lines = guidance 54 | 55 | source = f"{indent}#<<{guidance_lines}>>\n" 56 | 57 | if replacement is not None: 58 | source += indent + indent.join(replacement.splitlines(True)) 59 | source += f"{indent}#<>\n" 60 | elif self.inlined is not None: 61 | source += self.inlined 62 | source += f"{indent}#<>\n" 63 | 64 | return source 65 | 66 | grammar = Grammar( 67 | r""" 68 | maccarone = human_source maccarone_chunk* 69 | maccarone_chunk = snippet human_source? 70 | 71 | snippet = snippet_open (ai_source snippet_close)? 72 | snippet_open = snippet_open_single / snippet_open_multi 73 | snippet_open_single = guidance_open guidance_inner ">>" nl 74 | snippet_open_multi = guidance_open nl guidance_lines guidance_close 75 | snippet_close = ws "#<>" nl 76 | 77 | guidance_open = ws "#<<" 78 | guidance_close = ws "#>>" nl 79 | guidance_line = ws "#" guidance_inner nl 80 | guidance_lines = guidance_line+ 81 | guidance_inner = ~"((?!>>).)*" 82 | 83 | human_source = source_line* 84 | ai_source = source_line* 85 | source_line = !(guidance_open / guidance_close / snippet_close) ws ~".*" nl? 86 | 87 | ws = ~"[ \t]*" 88 | nl = ws ~"[\r\n]" 89 | """ 90 | ) 91 | 92 | @dataclass 93 | class GuidanceOpen: 94 | indent: str 95 | 96 | @dataclass 97 | class Guidance: 98 | text: str 99 | 100 | @dataclass 101 | class SnippetOpen: 102 | indent: str 103 | guidance: str 104 | 105 | def find_line_number(text: str, pos: int): 106 | #<> 107 | return text.count('\n', 0, pos) + 1 108 | #<> 109 | 110 | class RawSourceVisitor(NodeVisitor): 111 | def __init__(self, raw_source: str, block_at_line: Optional[int] = None): 112 | self._raw_source = raw_source 113 | self._block_at_line = block_at_line 114 | 115 | def generic_visit(self, node: Node, visited_children: List[Node]): 116 | return visited_children or node 117 | 118 | def visit_maccarone(self, node: Node, visited_children: list): 119 | (first_source, chunks) = visited_children 120 | 121 | return [first_source] + list(chain(*chunks)) 122 | 123 | def visit_maccarone_chunk(self, node: Node, visited_children: list): 124 | (snippet, source) = visited_children 125 | 126 | if isinstance(source, list): 127 | source_list = source 128 | else: 129 | source_list = [] 130 | 131 | return [snippet] + source_list 132 | 133 | def visit_snippet(self, node: Node, visited_children: list): 134 | (snippet_open, quantified_source) = visited_children 135 | 136 | if self._block_at_line is None: 137 | enabled = True 138 | else: 139 | start_line = find_line_number(self._raw_source, node.start) 140 | end_line = find_line_number(self._raw_source, node.end) 141 | enabled = start_line <= self._block_at_line <= end_line 142 | 143 | if isinstance(quantified_source, list): 144 | ((source, _),) = quantified_source 145 | else: 146 | source = None 147 | 148 | #<> 149 | return MissingPiece( 150 | start=node.start, 151 | end=node.end, 152 | indent=snippet_open.indent, 153 | guidance=snippet_open.guidance, 154 | enabled=enabled, 155 | inlined=source, 156 | ) 157 | #<> 158 | 159 | def visit_snippet_open(self, node: Node, visited_children: list): 160 | (single_or_multi,) = visited_children 161 | 162 | return single_or_multi 163 | 164 | def visit_snippet_open_single(self, node: Node, visited_children: list): 165 | (guidance_open, guidance, _, _) = visited_children 166 | 167 | return SnippetOpen( 168 | indent=guidance_open.indent, 169 | guidance=guidance.text, 170 | ) 171 | 172 | def visit_snippet_open_multi(self, node: Node, visited_children: list): 173 | (guidance_open, _, guidance, _) = visited_children 174 | 175 | return SnippetOpen( 176 | indent=guidance_open.indent, 177 | guidance=guidance.text, 178 | ) 179 | 180 | def visit_guidance_open(self, node: Node, visited_children: list): 181 | (ws, _) = visited_children 182 | 183 | return GuidanceOpen(indent=ws.text) 184 | 185 | def visit_guidance_line(self, node: Node, visited_children: list): 186 | (_, _, guidance_inner, _) = visited_children 187 | 188 | return guidance_inner 189 | 190 | def visit_guidance_lines(self, node: Node, visited_children: list): 191 | return Guidance( 192 | text="\n".join(g.text for g in visited_children) 193 | ) 194 | 195 | def visit_guidance_inner(self, node: Node, visited_children: list): 196 | return Guidance(text=node.text) 197 | 198 | def visit_human_source(self, node: Node, visited_children: list): 199 | return PresentPiece( 200 | start=node.start, 201 | end=node.end, 202 | text=node.text, 203 | ) 204 | 205 | def visit_ai_source(self, node: Node, visited_children: list): 206 | return node.text 207 | 208 | def raw_source_to_pieces(input: str, block_at_line: Optional[int] = None) -> List[Piece]: 209 | tree = grammar.parse(input) 210 | visitor = RawSourceVisitor(input, block_at_line) 211 | pieces = visitor.visit(tree) 212 | 213 | return pieces 214 | 215 | def raw_pieces_to_tagged_input(raw_pieces: List[Piece]) -> str: 216 | tag_source = "" 217 | id = 0 218 | 219 | for piece in raw_pieces: 220 | if isinstance(piece, PresentPiece): 221 | tag_source += piece.text 222 | elif isinstance(piece, MissingPiece): 223 | if piece.enabled: 224 | tag_source += f'# \n{piece.indent}# {piece.guidance}\n{piece.indent}# ' 225 | id += 1 226 | else: 227 | tag_source += f"{piece.indent}# {piece.guidance}\n" 228 | 229 | if piece.inlined is None: 230 | tag_source += f"{piece.indent}# (WIP)" 231 | else: 232 | tag_source += f"{piece.inlined}" 233 | else: 234 | raise TypeError("unknown piece type", piece) 235 | 236 | logger.debug("tagged input ↓\n%s", tag_source) 237 | 238 | return tag_source 239 | 240 | def tagged_input_to_tagged_output(tagged_input: str, chat_api: ChatAPI) -> str: 241 | system_prompt = """ 242 | You are an expert programmer working on contract. Your client has written a partial program, but left pieces for you to complete. They have marked those with `` tags inside Python comments, e.g.: 243 | 244 | ``` 245 | def add_two_numbers(x, y): 246 | # 247 | # add the two numbers 248 | # 249 | 250 | # 251 | # add two numbers from command line args, using argparse 252 | # 253 | ``` 254 | 255 | You should produce a document that provides a `` tag for each missing piece, e.g.: 256 | 257 | ``` 258 | 259 | return x + y 260 | 261 | 262 | import argparse 263 | parser = argparse.ArgumentParser() 264 | parser.add_argument("x", type=int) 265 | parser.add_argument("y", type=int) 266 | args = parser.parse_args() 267 | return add_two_numbers(args.x, args.y) 268 | 269 | ``` 270 | 271 | This formatting is very important. The client uses a custom tool to process your work product, and their tool requires this format. Follow this format exactly and do not copy anything outside a `` tag. 272 | """ 273 | chat_messages = [ 274 | {"role": "system", "content": system_prompt}, 275 | {"role": "user", "content": tagged_input}, 276 | ] 277 | tagged_output = chat_api.complete_chat("tagged_input_to_tagged_output", chat_messages) 278 | 279 | logger.debug("tagged output ↓\n%s", tagged_output) 280 | 281 | return tagged_output 282 | 283 | def tagged_output_to_completed_pieces(tagged_output: str) -> Dict[int, str]: 284 | pattern = re.compile(r'\n(?P.+?)', re.DOTALL) 285 | matches = pattern.finditer(tagged_output) 286 | completed = {int(m.group("id")): m.group("content") for m in matches} 287 | 288 | return completed 289 | 290 | def pieces_to_final_source( 291 | raw_pieces: List[Piece], 292 | completed_pieces: Dict[int, str], 293 | ) -> str: 294 | id = 0 295 | final_source = "" 296 | 297 | for raw in raw_pieces: 298 | if isinstance(raw, PresentPiece): 299 | final_source += raw.text 300 | elif isinstance(raw, MissingPiece): 301 | if raw.enabled: 302 | final_source += raw.complete(completed_pieces[id]) 303 | id += 1 304 | else: 305 | final_source += raw.complete(None) 306 | else: 307 | raise TypeError("unknown piece type", raw) 308 | 309 | logger.debug("final source ↓\n%s", final_source) 310 | 311 | return final_source 312 | 313 | def preprocess_maccarone( 314 | raw_source: str, 315 | chat_api: ChatAPI, 316 | block_at_line: Optional[int] = None, 317 | ) -> str: 318 | raw_pieces = raw_source_to_pieces(raw_source, block_at_line) 319 | tagged_input = raw_pieces_to_tagged_input(raw_pieces) 320 | tagged_output = tagged_input_to_tagged_output(tagged_input, chat_api) 321 | completed_pieces = tagged_output_to_completed_pieces(tagged_output) 322 | final_source = pieces_to_final_source( 323 | raw_pieces, 324 | completed_pieces, 325 | ) 326 | 327 | return final_source 328 | --------------------------------------------------------------------------------