├── .github
└── workflows
│ ├── code-quality.yml
│ ├── release.yml
│ └── sanity.yml
├── .gitignore
├── ChatDBG.pdf
├── Dockerfile
├── LICENSE
├── README.md
├── check-version-bump.py
├── media
├── gdb.svg
├── lldb.svg
└── pdb.svg
├── pyproject.toml
├── rust-support
├── Cargo.toml
├── chatdbg
│ ├── Cargo.toml
│ └── src
│ │ └── lib.rs
├── chatdbg_macros
│ ├── Cargo.toml
│ └── src
│ │ └── lib.rs
└── fill-crate-version.py
├── samples
├── cpp
│ ├── test-deep-recursion.cpp
│ ├── test-definition-likely.cpp
│ ├── test-failed-assert.cpp
│ ├── test-out-of-bounds-read.cpp
│ ├── test-overflow.cpp
│ ├── test-pointers-loop.cpp
│ ├── test-pointers.cpp
│ ├── test-stack-overflow.cpp
│ └── test-use-after-free.cpp
├── python
│ ├── README.md
│ ├── bootstrap.py
│ ├── bootstrap2.py
│ ├── ds101.py
│ ├── marble-sample.csv
│ ├── marbles.py
│ ├── mean.py
│ ├── nb.ipynb
│ ├── requirements.txt
│ ├── sample.py
│ └── testme.py
└── rust
│ ├── .gitignore
│ ├── Cargo.toml
│ └── test-failed-assert.rs
└── src
└── chatdbg
├── __init__.py
├── __main__.py
├── assistant
├── __init__.py
├── assistant.py
└── listeners.py
├── chatdbg_gdb.py
├── chatdbg_lldb.py
├── chatdbg_pdb.py
├── custom_pdb
├── prompts.py
└── text.py
├── native_util
├── clangd_lsp_integration.py
├── code.py
├── dbg_dialog.py
├── safety.py
└── stacks.py
├── pdb_util
├── capture.py
├── locals.py
├── paths.py
└── sandbox.py
└── util
├── config.py
├── exit_message.py
├── help.py
├── history.py
├── instructions
├── default.txt
└── gpt-4o.txt
├── jupyter.py
├── log.py
├── markdown.py
├── module_whitelist.txt
├── plog.py
├── printer.py
├── prompts.py
├── stream.py
├── text.py
├── trim.py
└── wrap.py
/.github/workflows/code-quality.yml:
--------------------------------------------------------------------------------
1 | name: Code quality
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | pull_request:
7 | branches: ["main"]
8 |
9 | jobs:
10 | code-quality:
11 | strategy:
12 | matrix:
13 | python: ["3.9","3.10", "3.11", "3.12", "3.13"]
14 |
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - uses: actions/setup-python@v5
21 | with:
22 | python-version: ${{ matrix.python }}
23 |
24 | - name: Install
25 | run: python3 -m pip install .
26 |
27 | - name: Install dependencies
28 | run: |
29 | python3 -m pip install --upgrade pip
30 | python3 -m pip install black
31 | python3 -m pip install mypy
32 |
33 | - name: Run compileall
34 | run: python3 -m compileall src
35 |
36 | - name: Run black
37 | run: python3 -m black --check src
38 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | release:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v4
16 |
17 | - name: Set up Python
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: '>=3.11'
21 |
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install --upgrade pip
25 | pip install build packaging requests
26 |
27 | - name: Check version bump
28 | run: python3 check-version-bump.py
29 |
30 | - name: Sync pyproject.toml/Cargo.toml versions
31 | run: python3 rust-support/fill-crate-version.py
32 |
33 | - name: Build package
34 | run: python -m build
35 |
36 | - name: Publish to crates.io
37 | run: |
38 | cargo publish -p chatdbg_macros --allow-dirty
39 | cargo publish -p chatdbg --allow-dirty
40 | env:
41 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
42 | working-directory: rust-support
43 |
44 | - name: Publish to PyPI
45 | uses: pypa/gh-action-pypi-publish@release/v1
46 | with:
47 | user: __token__
48 | password: ${{ secrets.PYPI_API_TOKEN }}
49 |
--------------------------------------------------------------------------------
/.github/workflows/sanity.yml:
--------------------------------------------------------------------------------
1 | name: Sanity
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | pull_request:
7 | branches: ["main"]
8 |
9 | jobs:
10 | sanity:
11 | strategy:
12 | matrix:
13 | platform: ["ubuntu-latest", "macos-latest", "windows-latest"]
14 | python: ["3.9","3.10", "3.11", "3.12", "3.13"]
15 |
16 | runs-on: ${{ matrix.platform }}
17 |
18 | steps:
19 | - uses: actions/checkout@v4
20 |
21 | - uses: actions/setup-python@v5
22 | with:
23 | python-version: ${{ matrix.python }}
24 |
25 | - name: Update pip
26 | run: python3 -m pip install --upgrade pip
27 |
28 | - name: Install
29 | run: python3 -m pip install .
30 |
31 | - name: Check help message
32 | run: python3 -m chatdbg --help
33 |
34 | - name: Check calling executable directly
35 | run: chatdbg --help
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | develop-eggs/
12 | dist/
13 | downloads/
14 | eggs/
15 | .eggs/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | wheels/
21 | share/python-wheels/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | MANIFEST
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .nox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *.cover
46 | *.py,cover
47 | .hypothesis/
48 | .pytest_cache/
49 | cover/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | db.sqlite3
59 | db.sqlite3-journal
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | .pybuilder/
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | # For a library or package, you might want to ignore these files since the code is
84 | # intended to run in multiple environments; otherwise, check them in:
85 | # .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # poetry
95 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
96 | # This is especially recommended for binary packages to ensure reproducibility, and is more
97 | # commonly ignored for libraries.
98 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
99 | #poetry.lock
100 |
101 | # pdm
102 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
103 | #pdm.lock
104 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
105 | # in version control.
106 | # https://pdm.fming.dev/#use-with-ide
107 | .pdm.toml
108 |
109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
110 | __pypackages__/
111 |
112 | # Celery stuff
113 | celerybeat-schedule
114 | celerybeat.pid
115 |
116 | # SageMath parsed files
117 | *.sage.py
118 |
119 | # Environments
120 | .env
121 | .venv
122 | env/
123 | venv/
124 | ENV/
125 | env.bak/
126 | venv.bak/
127 |
128 | # Spyder project settings
129 | .spyderproject
130 | .spyproject
131 |
132 | # Rope project settings
133 | .ropeproject
134 |
135 | # mkdocs documentation
136 | /site
137 |
138 | # mypy
139 | .mypy_cache/
140 | .dmypy.json
141 | dmypy.json
142 |
143 | # Pyre type checker
144 | .pyre/
145 |
146 | # pytype static type analyzer
147 | .pytype/
148 |
149 | # Cython debug symbols
150 | cython_debug/
151 |
152 | # PyCharm
153 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
154 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
155 | # and can be added to the global gitignore or merged into this file. For a more nuclear
156 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
157 | #.idea/
158 |
159 | a.out
160 | log.yaml
161 |
--------------------------------------------------------------------------------
/ChatDBG.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plasma-umass/ChatDBG/805a3d1900aebaaa26603b334e688d70c3d3a7bf/ChatDBG.pdf
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # A Dockerfile to make development setup easier. Run with:
2 | # docker run -it --rm -e OPENAI_API_KEY=$OPENAI_API_KEY $(docker build -q .)
3 |
4 | FROM ubuntu
5 |
6 | ARG LLVM_VERSION=20
7 |
8 | RUN apt update \
9 | && DEBIAN_FRONTEND=noninteractive apt install -y tzdata \
10 | && apt install -y python3 python3-pip \
11 | && apt install -y locales \
12 | && apt install -y autoconf automake bear bison build-essential cmake flex gdb git libgdbm-dev m4 texinfo \
13 | && apt install -y curl lsb-release wget software-properties-common gnupg \
14 | && curl -sSf https://apt.llvm.org/llvm.sh | bash -s -- ${LLVM_VERSION} all \
15 | && apt clean \
16 | && rm -rf /var/lib/apt/lists/*
17 |
18 | # UTF-8.
19 | RUN locale-gen en_US.UTF-8
20 | RUN update-locale LANG=en_US.UTF-8
21 | ENV LANG=en_US.UTF-8
22 | ENV LANGUAGE=en_US:en
23 | ENV LC_ALL=en_US.UTF-8
24 |
25 | # LLVM/Clang.
26 | RUN update-alternatives --install /usr/bin/clang clang /usr/bin/clang-${LLVM_VERSION} 100
27 | RUN update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-${LLVM_VERSION} 100
28 | RUN update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-${LLVM_VERSION} 100
29 | RUN update-alternatives --install /usr/bin/lldb lldb /usr/bin/lldb-${LLVM_VERSION} 100
30 |
31 | RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100
32 | RUN update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 100
33 | ENV CC=clang
34 | ENV CXX=clang++
35 |
36 | # ChatDBG.
37 | COPY . /root/ChatDBG
38 | RUN pip install --break-system-packages -e /root/ChatDBG
39 | RUN python3 -c 'import chatdbg; print(f"command script import {chatdbg.__path__[0]}/chatdbg_lldb.py")' >> ~/.lldbinit
40 | RUN echo 'settings set target.disable-aslr false' >> ~/.lldbinit
41 | RUN python3 -c 'import chatdbg; print(f"source {chatdbg.__path__[0]}/chatdbg_gdb.py")' >> ~/.gdbinit
42 | ENV CHATDBG_UNSAFE=1
43 |
44 | RUN pip install --break-system-packages -r /root/ChatDBG/samples/python/requirements.txt
45 |
46 | # Bugbench.
47 | RUN git clone https://github.com/nicovank/bugbench.git /root/ChatDBG/samples/bugbench
48 | RUN cd /root/ChatDBG/samples/bugbench && make all
49 |
50 | ENV TERM=xterm-256color
51 | WORKDIR /root/ChatDBG
52 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ChatDBG
2 |
3 | by [Emery Berger](https://emeryberger.com), [Stephen Freund](https://www.cs.williams.edu/~freund/index.html), [Kyla Levin](https://khlevin.github.io/KylaHLevin/index.html), [Nicolas van Kempen](https://nvankempen.com/) (ordered alphabetically)
4 |
5 | [](https://pypi.org/project/chatdbg/)
6 | [](https://pepy.tech/project/chatdbg)
7 | [](https://pepy.tech/project/chatdbg)
8 |
9 | [Read the paper!](https://raw.githubusercontent.com/plasma-umass/ChatDBG/main/ChatDBG.pdf)
10 |
11 | ChatDBG is an AI-based debugging assistant for C/C++/Python/Rust code that integrates large language models into a standard debugger (`pdb`, `lldb`, `gdb`) to help debug your code. With ChatDBG, you can engage in a dialog with your debugger, asking open-ended questions about your program, like `why is x null?`. ChatDBG will _take the wheel_ and steer the debugger to answer your queries. ChatDBG can provide error diagnoses and suggest fixes.
12 |
13 | As far as we are aware, ChatDBG is the _first_ debugger to automatically perform root cause analysis and to provide suggested fixes.
14 |
15 | **Watch ChatDBG in action!**
16 | | LLDB on [test-overflow.cpp](https://github.com/plasma-umass/ChatDBG/blob/main/samples/cpp/test-overflow.cpp) | GDB on [test-overflow.cpp](https://github.com/plasma-umass/ChatDBG/blob/main/samples/cpp/test-overflow.cpp) | Pdb on [bootstrap.py](https://github.com/plasma-umass/ChatDBG/blob/main/samples/python/bootstrap.py) |
17 | |:-------------------------:|:-------------------------:|:-------------------------:|
18 | |
|
|
|
19 |
20 | For technical details and a complete evaluation, see our FSE'25 paper, [_ChatDBG: An AI-Powered Debugging Assistant_](https://dl.acm.org/doi/10.1145/3729355) ([PDF](https://raw.githubusercontent.com/plasma-umass/ChatDBG/main/ChatDBG.pdf)).
21 |
22 | > [!NOTE]
23 | >
24 | > ChatDBG for `pdb`, `lldb`, and `gdb` are feature-complete; we are currently backporting features for these debuggers into the other debuggers.
25 |
26 | ## Installation
27 |
28 | > [!IMPORTANT]
29 | >
30 | > ChatDBG currently needs to be connected to an [OpenAI account](https://openai.com/api/). _Your account will need to have a positive balance for this to work_ ([check your balance](https://platform.openai.com/account/usage)). If you have never purchased credits, you will need to purchase at least \$1 in credits (if your API account was created before August 13, 2023) or \$0.50 (if you have a newer API account) in order to have access to GPT-4, which ChatDBG uses. [Get a key here.](https://platform.openai.com/account/api-keys)
31 | >
32 | > Once you have an API key, set it as an environment variable called `OPENAI_API_KEY`.
33 | >
34 | > ```bash
35 | > export OPENAI_API_KEY=
36 | > ```
37 |
38 | Install ChatDBG using `pip` (you need to do this whether you are debugging Python, C, or C++ code):
39 |
40 | ```bash
41 | python3 -m pip install chatdbg
42 | ```
43 |
44 | If you are using ChatDBG to debug Python programs, you are done. If you want to use ChatDBG to debug native code with `gdb` or `lldb`, follow the installation instructions below.
45 |
46 | ### Installing as an `lldb` extension
47 |
48 |
49 |
50 | lldb installation instructions
51 |
52 |
53 | Install ChatDBG into the `lldb` debugger by running the following command:
54 |
55 | #### Linux
56 |
57 | ```bash
58 | python3 -m pip install ChatDBG
59 | python3 -c 'import chatdbg; print(f"command script import {chatdbg.__path__[0]}/chatdbg_lldb.py")' >> ~/.lldbinit
60 | ```
61 |
62 | If you encounter an error, you may be using an older version of LLVM. Update to the latest version as follows:
63 |
64 | ```
65 | sudo apt install -y lsb-release wget software-properties-common gnupg
66 | curl -sSf https://apt.llvm.org/llvm.sh | sudo bash -s -- 18 all
67 | # LLDB now available as `lldb-18`.
68 | ```
69 |
70 | #### Mac
71 |
72 | ```bash
73 | xcrun python3 -m pip install ChatDBG
74 | xcrun python3 -c 'import chatdbg; print(f"command script import {chatdbg.__path__[0]}/chatdbg_lldb.py")' >> ~/.lldbinit
75 | ```
76 |
77 | This will install ChatDBG as an LLVM extension.
78 |
79 |
80 |
81 | ### Installing as a `gdb` extension
82 |
83 |
84 |
85 | gdb installation instructions
86 |
87 |
88 | Install ChatDBG into the `gdb` debugger by running the following command:
89 |
90 | ```bash
91 | python3 -m pip install ChatDBG
92 | python3 -c 'import chatdbg; print(f"source {chatdbg.__path__[0]}/chatdbg_gdb.py")' >> ~/.gdbinit
93 | ```
94 |
95 | This will install ChatDBG as a GDB extension.
96 |
97 |
98 |
99 | ## Usage
100 |
101 | ### Debugging Python
102 |
103 | To use ChatDBG to debug Python programs, simply run your Python script as follows:
104 |
105 | ```bash
106 | chatdbg -c continue yourscript.py
107 | ```
108 |
109 | ChatDBG is an extension of the standard Python debugger `pdb`. Like
110 | `pdb`, when your script encounters an uncaught exception, ChatDBG will
111 | enter post mortem debugging mode.
112 |
113 | Unlike other debuggers, you can then use the `why` command to ask
114 | ChatDBG why your program failed and get a suggested fix. After the LLM responds,
115 | you may issue additional debugging commands or continue the conversation by entering
116 | any other text.
117 |
118 | #### IPython and Jupyter Support
119 |
120 | To use ChatDBG as the default debugger for IPython or inside Jupyter Notebooks,
121 | create a IPython profile and then add the necessary exensions on startup. (Modify
122 | these lines as necessary if you already have a customized profile file.)
123 |
124 | ```bash
125 | ipython profile create
126 | echo "c.InteractiveShellApp.extensions = ['chatdbg.chatdbg_pdb', 'ipyflow']" >> ~/.ipython/profile_default/ipython_config.py
127 | ```
128 |
129 | On the command line, you can then run:
130 |
131 | ```bash
132 | ipython --pdb yourscript.py
133 | ```
134 |
135 | Inside Jupyter, run your notebook with the [ipyflow kernel](https://github.com/ipyflow/ipyflow) and include this line magic at the top of the file.
136 |
137 | ```
138 | %pdb
139 | ```
140 |
141 | ### Debugging native code (C, C++, or Rust with lldb / gdb)
142 |
143 | To use ChatDBG with `lldb` or `gdb`, just run native code (compiled with `-g` for debugging symbols) with your choice of debugger; when it crashes, ask `why`. This also works for post mortem debugging (when you load a core with the `-c` option).
144 |
145 | The native debuggers work slightly differently than Pdb. After the debugger responds to your question, you will enter into ChatDBG's command loop, as indicated by the `(ChatDBG chatting)` prompt. You may continue issuing debugging commands and you may send additional messages to the LLM by starting those messages with "chat". When you are done, type `quit` to return to the debugger's main command loop.
146 |
147 |
148 |
149 | Debugging Rust programs
150 |
151 |
152 | To use ChatDBG with Rust, you need to do two steps: modify your
153 | `Cargo.toml` file and add one line to your source program.
154 |
155 | 1. Add this to your `Cargo.toml` file:
156 |
157 | ```toml
158 | [dependencies]
159 | chatdbg = "0.6.2"
160 |
161 | [profile.dev]
162 | panic = "abort"
163 |
164 | [profile.release]
165 | panic = "abort"
166 | ```
167 |
168 | 2. In your program, apply the `#[chatdbg::main]` attribute to your `main`
169 | function:
170 |
171 | ```rust
172 | #[chatdbg::main]
173 | fn main() {
174 | ```
175 |
176 | Now you can debug your Rust code with `gdb` or `lldb`.
177 |
178 |
179 |
180 | ### Examples
181 |
182 |
183 |
184 | ChatDBG example in lldb
185 |
186 |
187 | ```gdb
188 | (ChatDBG lldb) run
189 | Process 85494 launched: '/Users/emery/git/ChatDBG/test/a.out' (arm64)
190 | TEST 1
191 | TEST -422761288
192 | TEST 0
193 | TEST 0
194 | TEST 0
195 | TEST 0
196 | TEST 0
197 | TEST 0
198 | Process 85494 stopped
199 | * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x100056200)
200 | frame #0: 0x0000000100002f64 a.out`foo(n=8, b=1) at test.cpp:7:22
201 | 4 int x[] = { 1, 2, 3, 4, 5 };
202 | 5
203 | 6 void foo(int n, float b) {
204 | -> 7 cout << "TEST " << x[n * 10000] << endl;
205 | 8 }
206 | 9
207 | 10 int main()
208 | Target 0: (a.out) stopped.
209 | ```
210 |
211 | Ask `why` to have ChatDBG provide a helpful explanation why this program failed, and suggest a fix:
212 |
213 | ````gdb
214 | (ChatDBG lldb) why
215 | The root cause of this error is accessing an index of the array `x`
216 | that is out of bounds. In `foo()`, the index is calculated as `n *
217 | 10000`, which can be much larger than the size of the array `x` (which
218 | is only 5 elements). In the given trace, the program is trying to
219 | access the memory address `0x100056200`, which is outside of the range
220 | of allocated memory for the array `x`.
221 |
222 | To fix this error, we need to ensure that the index is within the
223 | bounds of the array. One way to do this is to check the value of `n`
224 | before calculating the index, and ensure that it is less than the size
225 | of the array divided by the size of the element. For example, we can
226 | modify `foo()` as follows:
227 |
228 | ```
229 | void foo(int n, float b) {
230 | if (n < 0 || n >= sizeof(x)/sizeof(x[0])) {
231 | cout << "ERROR: Invalid index" << endl;
232 | return;
233 | }
234 | cout << "TEST " << x[n] << endl;
235 | }
236 | ```
237 |
238 | This code checks if `n` is within the valid range, and prints an error
239 | message if it is not. If `n` is within the range, the function prints
240 | the value of the element at index `n` of `x`. With this modification,
241 | the program will avoid accessing memory outside the bounds of the
242 | array, and will print the expected output for valid indices.
243 | ````
244 |
245 |
246 |
247 |
248 |
249 | ChatDBG example in Python (pdb)
250 |
251 |
252 | ```python
253 | Traceback (most recent call last):
254 | File "yourscript.py", line 9, in
255 | print(tryme(100))
256 | File "yourscript.py", line 4, in tryme
257 | if x / i > 2:
258 | ZeroDivisionError: division by zero
259 | Uncaught exception. Entering post mortem debugging
260 | Running 'cont' or 'step' will restart the program
261 | > yourscript.py(4)tryme()
262 | -> if x / i > 2:
263 | ```
264 |
265 | Ask `why` to have ChatDBG provide a helpful explanation why this program failed, and suggest a fix:
266 |
267 | ```python
268 | (ChatDBG Pdb) why
269 | The root cause of the error is that the code is attempting to
270 | divide by zero in the line "if x / i > 2". As i ranges from 0 to 99,
271 | it will eventually reach the value of 0, causing a ZeroDivisionError.
272 |
273 | A possible fix for this would be to add a check for i being equal to
274 | zero before performing the division. This could be done by adding an
275 | additional conditional statement, such as "if i == 0: continue", to
276 | skip over the iteration when i is zero. The updated code would look
277 | like this:
278 |
279 | def tryme(x):
280 | count = 0
281 | for i in range(100):
282 | if i == 0:
283 | continue
284 | if x / i > 2:
285 | count += 1
286 | return count
287 |
288 | if __name__ == '__main__':
289 | print(tryme(100))
290 | ```
291 |
292 |
293 |
--------------------------------------------------------------------------------
/check-version-bump.py:
--------------------------------------------------------------------------------
1 | import tomllib
2 | import sys
3 |
4 | from packaging.version import Version
5 | import requests
6 |
7 | PYPI_URL = "https://pypi.org/pypi/chatdbg/json"
8 |
9 | latest = requests.get(PYPI_URL).json()["info"]["version"]
10 | with open("pyproject.toml", "rb") as f:
11 | current = tomllib.load(f)["project"]["version"]
12 |
13 | if Version(current) <= Version(latest):
14 | print(f"Latest version is {latest} on PyPI. Please bump versions.")
15 | sys.exit(1)
16 |
--------------------------------------------------------------------------------
/media/gdb.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/media/lldb.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | (ChatDBG lldb)
58 |
59 | run
60 |
61 |
62 |
63 | Process 23558 launched: '/home/steve/ChatDBG/samples/cpp/a.out' (x86_64)
64 |
65 |
66 |
67 | TEST 1
68 |
69 |
70 |
71 | TEST 0
72 |
73 |
74 |
75 | TEST 0
76 |
77 |
78 |
79 | TEST 0
80 |
81 |
82 |
83 | Process 23558 stopped
84 |
85 |
86 |
87 | * thread #1, name = 'a.out', stop reason =
88 |
89 | signal SIGSEGV: address not mapped to object (fault address: 0x55555557f110)
90 |
91 |
92 |
93 | frame #0:
94 |
95 | 0x0000555555555213
96 |
97 | a.out`foo(n=4, b=1) at
98 |
99 | test-overflow.cpp
100 |
101 | :
102 |
103 | 7
104 |
105 | :
106 |
107 | 33
108 |
109 |
110 |
111 | 4
112 |
113 | int
114 |
115 | x[] = {
116 |
117 | 1
118 |
119 | ,
120 |
121 | 2
122 |
123 | ,
124 |
125 | 3
126 |
127 | ,
128 |
129 | 4
130 |
131 | ,
132 |
133 | 5
134 |
135 | };
136 |
137 |
138 |
139 | 5
140 |
141 |
142 |
143 | 6
144 |
145 | void
146 |
147 | foo(
148 |
149 | int
150 |
151 | n,
152 |
153 | float
154 |
155 | b) {
156 |
157 |
158 |
159 | ->
160 |
161 | 7
162 |
163 | cout << "TEST " << x[n *
164 |
165 | 10000
166 |
167 | ]
168 |
169 | << endl;
170 |
171 |
172 |
173 | 8
174 |
175 | }
176 |
177 |
178 |
179 | 9
180 |
181 |
182 |
183 | 10
184 |
185 | int
186 |
187 | main()
188 |
189 |
190 |
191 | (ChatDBG lldb)
192 |
193 | why
194 |
195 |
196 |
197 | The SIGSEGV (segmentation fault) signal indicates that the program attempted to access memory that it's not
198 |
199 |
200 |
201 | allowed to. Examining the provided stack trace and the corresponding source code reveals the potential cause of
202 |
203 |
204 |
205 | the segmentation fault.
206 |
207 |
208 |
209 | In frame #0, within the
210 |
211 | foo
212 |
213 | function, there is an attempt to access an element of the array
214 |
215 | x
216 |
217 | using an index
218 |
219 | n *
220 |
221 |
222 |
223 | 10000
224 |
225 | :
226 |
227 |
228 |
229 | 7
230 |
231 | cout
232 |
233 | <<
234 |
235 | "TEST "
236 |
237 | <<
238 |
239 | x[n
240 |
241 | *
242 |
243 | 10000
244 |
245 | ]
246 |
247 | <<
248 |
249 | endl;
250 |
251 |
252 |
253 | The array
254 |
255 | x
256 |
257 | is defined with only 5 elements:
258 |
259 |
260 |
261 | 4
262 |
263 | int
264 |
265 | x[]
266 |
267 | =
268 |
269 | {
270 |
271 | 1
272 |
273 | ,
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
--------------------------------------------------------------------------------
/media/pdb.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 | exec(cmd, globals, locals)
48 |
49 |
50 |
51 | File "<string>", line 1, in <module>
52 |
53 |
54 |
55 | File "/home/steve/ChatDBG/samples/python/bootstrap.py", line 19, in <module>
56 |
57 |
58 |
59 | stats = resampled_stats(observed_marbles, 5)
60 |
61 |
62 |
63 | File "/home/steve/ChatDBG/samples/python/bootstrap.py", line 15, in resampled_stats
64 |
65 |
66 |
67 | assert len(stats) == num_trials
68 |
69 |
70 |
71 | AssertionError
72 |
73 |
74 |
75 | Uncaught exception. Entering post mortem debugging
76 |
77 |
78 |
79 | Running 'cont' or 'step' will restart the program
80 |
81 |
82 |
83 | >
84 |
85 | /home/steve/ChatDBG/samples/python/bootstrap.py
86 |
87 | (15)
88 |
89 | resampled_stats
90 |
91 | ()
92 |
93 |
94 |
95 | 10
96 |
97 |
98 |
99 | 11 def
100 |
101 | resampled_stats
102 |
103 | (
104 |
105 | observed_marbles
106 |
107 | ,
108 |
109 | num_trials
110 |
111 | ):
112 |
113 |
114 |
115 | 12
116 |
117 | stats = bootstrap_statistic(observed_marbles,
118 |
119 |
120 |
121 | 13
122 |
123 | proportion_blue
124 |
125 | ,
126 |
127 |
128 |
129 | 14
130 |
131 | num_trials)
132 |
133 |
134 |
135 | ---> 15
136 |
137 | assert
138 |
139 | len
140 |
141 | (
142 |
143 | stats
144 |
145 | )
146 |
147 | ==
148 |
149 | num_trials
150 |
151 |
152 |
153 | 16
154 |
155 | return
156 |
157 | stats
158 |
159 |
160 |
161 | 17
162 |
163 |
164 |
165 | 18
166 |
167 | observed_marbles
168 |
169 | =
170 |
171 | make_marble_sample
172 |
173 | ()
174 |
175 |
176 |
177 | 19
178 |
179 | stats
180 |
181 | =
182 |
183 | resampled_stats
184 |
185 | (
186 |
187 | observed_marbles
188 |
189 | ,
190 |
191 | 5
192 |
193 | )
194 |
195 |
196 |
197 | (ChatDBG)
198 |
199 | why
200 |
201 |
202 |
203 | (ChatDBG) p proportion_blue
204 |
205 |
206 |
207 | <function proportion_blue at 0x7f5655935480>
208 |
209 |
210 |
211 | (ChatDBG) p bootstrap_statistic
212 |
213 |
214 |
215 | <function bootstrap_statistic at 0x7f5655e26440>
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "ChatDBG"
7 | version = "1.0.0"
8 | authors = [
9 | { name="Emery Berger", email="emery.berger@gmail.com" },
10 | { name="Stephen Freund", email="sfreund@williams.edu" },
11 | { name="Kyla Levin", email="khlevin@umass.edu" },
12 | { name="Nicolas van Kempen", email="nvankemp@gmail.com" },
13 | ]
14 | dependencies = [
15 | "llm-utils>=0.2.8",
16 | "openai>=1.29.0",
17 | "rich>=13.7.0",
18 | "ansicolors>=1.1.8",
19 | "traitlets>=5.14.1",
20 | "ipdb>=0.13.13",
21 | "ipython==8.18.1",
22 | "litellm==1.55.9",
23 | "PyYAML>=6.0.1",
24 | "ipyflow>=0.0.130",
25 | "numpy>=1.26.3",
26 | ]
27 | description = "AI-assisted debugging. Uses AI to answer 'why'."
28 | readme = "README.md"
29 | requires-python = ">=3.9"
30 | classifiers = [
31 | "Programming Language :: Python :: 3",
32 | "License :: OSI Approved :: Apache Software License",
33 | "Operating System :: OS Independent",
34 | ]
35 |
36 | [project.scripts]
37 | chatdbg = "chatdbg.__main__:main"
38 | print_chatdbg_log = "chatdbg.util.plog:main"
39 |
40 | [project.urls]
41 | "Homepage" = "https://github.com/plasma-umass/ChatDBG"
42 | "Bug Tracker" = "https://github.com/plasma-umass/ChatDBG/issues"
43 |
--------------------------------------------------------------------------------
/rust-support/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | resolver = "2"
3 | members = [
4 | "chatdbg",
5 | "chatdbg_macros",
6 | ]
7 |
--------------------------------------------------------------------------------
/rust-support/chatdbg/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "chatdbg"
3 | version = "##VERSION##" # Will be auto-synced with pyproject.toml on release.
4 | edition = "2021"
5 | description = "Rust-specific support for ChatDBG."
6 | authors = ["Emery Berger "]
7 | license = "Apache-2.0"
8 | readme = "../../README.md"
9 | homepage = "https://github.com/plasma-umass/ChatDBG/"
10 | repository = "https://github.com/plasma-umass/ChatDBG/"
11 |
12 | [dependencies]
13 | chatdbg_macros = "##VERSION##" # Sync with pyproject.toml on release.
14 | lazy_static = "1.4.0"
15 |
--------------------------------------------------------------------------------
/rust-support/chatdbg/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub use chatdbg_macros::main;
2 |
3 | use std::fs::{File, OpenOptions};
4 | use std::io::Write;
5 | use std::panic;
6 | use std::sync::{
7 | atomic::{AtomicBool, Ordering},
8 | Mutex,
9 | };
10 | use std::thread;
11 |
12 | // Global Mutex to synchronize file writes across threads.
13 | lazy_static::lazy_static! {
14 | static ref FILE_MUTEX: Mutex<()> = Mutex::new(());
15 | static ref FILE_CREATED: AtomicBool = AtomicBool::new(false);
16 | }
17 |
18 | pub fn chatdbg() {
19 | // Set a custom panic hook.
20 | panic::set_hook(Box::new(|info| {
21 | let _guard = FILE_MUTEX.lock().unwrap(); // Lock Mutex to synchronize access.
22 |
23 | // Format the panic message similarly to the default panic handler.
24 | let payload = if let Some(s) = info.payload().downcast_ref::<&str>() {
25 | *s
26 | } else {
27 | "Box"
28 | };
29 |
30 | let location = if let Some(location) = info.location() {
31 | format!(" at '{}' line {}", location.file(), location.line())
32 | } else {
33 | String::from("")
34 | };
35 |
36 | let message = format!(
37 | "thread '{}' panicked with '{}'{}",
38 | thread::current().name().unwrap_or(""),
39 | payload,
40 | location
41 | );
42 |
43 | // Print to stderr.
44 | eprintln!("{}", message);
45 |
46 | // Specify the filename without including the process id.
47 | let filename = "panic_log.txt";
48 |
49 | // Open the file with appropriate options.
50 | let mut file = if FILE_CREATED.swap(true, Ordering::SeqCst) {
51 | // If the file is already created by another thread, open it in append mode.
52 | OpenOptions::new()
53 | .create(true)
54 | .append(true)
55 | .open(filename)
56 | .expect("Unable to open file")
57 | } else {
58 | // If this is the first thread to create the file, overwrite any existing file.
59 | File::create(filename).expect("Unable to create file")
60 | };
61 |
62 | // Write to the file.
63 | writeln!(file, "{}", message).expect("Unable to write to file");
64 | }));
65 | }
66 |
--------------------------------------------------------------------------------
/rust-support/chatdbg_macros/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "chatdbg_macros"
3 | version = "##VERSION##" # Will be auto-synced with pyproject.toml on release.
4 | edition = "2021"
5 | description = "Rust-specific (macro) support for ChatDBG."
6 | authors = ["Noah Lev Bartell-Mangel "]
7 | license = "Apache-2.0"
8 | readme = "../../README.md"
9 | homepage = "https://github.com/plasma-umass/ChatDBG/"
10 | repository = "https://github.com/plasma-umass/ChatDBG/"
11 |
12 | [dependencies]
13 | quote = "1.0.33"
14 | syn = { version = "2.0.37", features = ["full"] }
15 |
16 | [lib]
17 | proc-macro = true
18 |
--------------------------------------------------------------------------------
/rust-support/chatdbg_macros/src/lib.rs:
--------------------------------------------------------------------------------
1 | use proc_macro::TokenStream;
2 | use quote::quote;
3 | use syn::{parse, ItemFn};
4 |
5 | #[proc_macro_attribute]
6 | pub fn main(_attr: TokenStream, input: TokenStream) -> TokenStream {
7 | let item = match parse(input) {
8 | Ok(i) => i,
9 | Err(_) => return quote! {}.into(),
10 | };
11 | let ItemFn {
12 | attrs,
13 | vis,
14 | sig,
15 | block,
16 | } = item;
17 | let stmts = &block.stmts;
18 | quote! {
19 | #(#attrs)* #vis #sig {
20 | ::chatdbg::chatdbg();
21 | #(#stmts)*
22 | }
23 | }
24 | .into()
25 | }
26 |
--------------------------------------------------------------------------------
/rust-support/fill-crate-version.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tomllib
3 | import re
4 |
5 | DIRNAME = os.path.dirname(__file__)
6 | PYPROJECT_FILE = os.path.join(DIRNAME, "../pyproject.toml")
7 | CARGO_TOML_FILES = [
8 | os.path.join(DIRNAME, "chatdbg/Cargo.toml"),
9 | os.path.join(DIRNAME, "chatdbg_macros/Cargo.toml"),
10 | ]
11 |
12 |
13 | if __name__ == "__main__":
14 | with open(PYPROJECT_FILE, "rb") as f:
15 | version = tomllib.load(f)["project"]["version"]
16 | for file in CARGO_TOML_FILES:
17 | with open(file, "r") as f:
18 | content = f.read()
19 | content = re.sub(r"##VERSION##", version, content)
20 | with open(file, "w") as f:
21 | f.write(content)
22 |
--------------------------------------------------------------------------------
/samples/cpp/test-deep-recursion.cpp:
--------------------------------------------------------------------------------
1 | struct David
2 | {
3 | int *data;
4 | };
5 | struct Charlie
6 | {
7 | David *d1;
8 | };
9 | struct Bob
10 | {
11 |
12 | Charlie *c1;
13 | Charlie *c2;
14 | David *d2;
15 | };
16 | struct Adam
17 | {
18 | Bob *b1;
19 | Bob *b2;
20 | };
21 |
22 | int main()
23 | {
24 | int arrayofints[] = {21, 24, 85, 56, 37, 10, 34, 48, 92, 13};
25 | int *p = &arrayofints[0];
26 | David david1 = {&arrayofints[4]};
27 | David david2 = {&arrayofints[2]};
28 | David david3 = {&arrayofints[8]};
29 | Charlie charlie1 = {&david1};
30 | Charlie charlie2 = {&david2};
31 | Bob bob1 = {&charlie1, &charlie2, &david3};
32 | David david4 = {&arrayofints[7]};
33 | David david5 = {nullptr};
34 | David david6 = {&arrayofints[0]};
35 | Charlie charlie3 = {&david4};
36 | Charlie charlie4 = {&david5};
37 | Bob bob2 = {&charlie3, &charlie4, &david6};
38 | Adam adam1 = {&bob1, &bob2};
39 |
40 | int n = *adam1.b2->c2->d1->data;
41 | return 0;
42 | }
43 |
--------------------------------------------------------------------------------
/samples/cpp/test-definition-likely.cpp:
--------------------------------------------------------------------------------
1 | static int* p = nullptr;
2 |
3 | struct Bob
4 | {
5 | int ****data;
6 | };
7 | struct Adam
8 | {
9 | Bob *b1;
10 | Bob *b2;
11 | };
12 |
13 | int main()
14 | {
15 | int **p2 = &p;
16 | int ***p3 = &p2;
17 | int ****p4 = &p3;
18 |
19 | Bob bob1 = {p4};
20 | Bob bob2 = {p4};
21 | Adam adam1 = {&bob1, &bob2};
22 |
23 | int n = ****adam1.b1->data;
24 | return 0;
25 | }
26 |
--------------------------------------------------------------------------------
/samples/cpp/test-failed-assert.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | float fact(float n) {
5 | auto x = 1.0;
6 | for (auto i = 0.0; i < n; i++) {
7 | x *= i;
8 | }
9 | assert(x != 0.0);
10 | return x;
11 | }
12 |
13 |
14 | int main()
15 | {
16 | std::cout << fact(100) << std::endl;
17 | return 0;
18 | }
19 |
--------------------------------------------------------------------------------
/samples/cpp/test-out-of-bounds-read.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | void f(int value) {
5 | // The value in this function is expected to be in [1, 5].
6 | std::cout << value << std::endl;
7 | }
8 |
9 | int main() {
10 | int a[5] = {1, 2, 3, 4, 5};
11 | int b[5] = {6, 7, 8, 9, 10};
12 |
13 | f(*(a + 5));
14 | }
15 |
--------------------------------------------------------------------------------
/samples/cpp/test-overflow.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace std;
3 |
4 | int x[] = { 1, 2, 3, 4, 5 };
5 |
6 | void foo(int n, float b) {
7 | cout << "TEST " << x[n * 10000] << endl;
8 | }
9 |
10 | int main()
11 | {
12 | for (auto i = 0; i < 10; i++) {
13 | foo(i, 1.0);
14 | }
15 | return 0;
16 | }
17 |
--------------------------------------------------------------------------------
/samples/cpp/test-pointers-loop.cpp:
--------------------------------------------------------------------------------
1 | class Node
2 | {
3 | public:
4 | int data;
5 | Node *next;
6 |
7 | Node(int value) : data(value), next(nullptr) {}
8 | };
9 |
10 | int main()
11 | {
12 | Node *node1 = new Node(10);
13 | Node *node2 = new Node(20);
14 | Node *node3 = new Node(30);
15 |
16 | node1->next = node2;
17 | node2->next = node1;
18 |
19 | Node n = *node3->next;
20 |
21 | delete node1;
22 | delete node2;
23 | delete node3;
24 |
25 | return 0;
26 | }
--------------------------------------------------------------------------------
/samples/cpp/test-pointers.cpp:
--------------------------------------------------------------------------------
1 | struct Bob
2 | {
3 | int ****data;
4 | };
5 | struct Adam
6 | {
7 | Bob *b1;
8 | Bob *b2;
9 | };
10 |
11 | int main()
12 | {
13 | int num = 1;
14 | int *p1 = #
15 | int **p2 = &p1;
16 | int ***p3 = &p2;
17 | int ****p4 = &p3;
18 |
19 | Bob bob1 = {p4};
20 | Bob bob2 = {nullptr};
21 | Adam adam1 = {&bob1, &bob2};
22 |
23 | int n = ****adam1.b2->data;
24 | return 0;
25 | }
26 |
--------------------------------------------------------------------------------
/samples/cpp/test-stack-overflow.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int fib(int n) {
4 | return fib(n-1) + fib(n-2);
5 | }
6 |
7 | int main()
8 | {
9 | auto const n = 100;
10 | std::cout << "fib(" << n << ") = " << fib(n);
11 | return 0;
12 | }
13 |
--------------------------------------------------------------------------------
/samples/cpp/test-use-after-free.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | void doSomething(int * ptr) {
4 | *ptr = 0;
5 | }
6 |
7 | int main()
8 | {
9 | int * n = new int(100);
10 | n--;
11 | delete n;
12 | char * ch = new char[16];
13 | delete [] ch;
14 | doSomething(n);
15 | std::cout << "n = " << *n << std::endl;
16 | return 0;
17 | }
18 |
--------------------------------------------------------------------------------
/samples/python/README.md:
--------------------------------------------------------------------------------
1 | # Python Tests
2 |
3 | * Install the packages in `requirements.txt`. Some of the tests use them.
4 |
5 | * Run, for example:
6 |
7 | ```
8 | chatdbg -c continue marbles.py
9 | ```
10 |
11 | * *NOTE*: one python package used by the programs has been replaced
12 | with a small python stub to hide the author's identity. That may
13 | change how ChatDBG works, since it includes that stub as part of
14 | the user-defined code.
15 |
--------------------------------------------------------------------------------
/samples/python/bootstrap.py:
--------------------------------------------------------------------------------
1 | from datascience import *
2 | from ds101 import *
3 |
4 | def make_marble_sample():
5 | table = Table().read_table('marble-sample.csv')
6 | return table.column('color')
7 |
8 | def proportion_blue(sample):
9 | return sample
10 |
11 | def resampled_stats(observed_marbles, num_trials):
12 | stats = bootstrap_statistic(observed_marbles,
13 | proportion_blue,
14 | num_trials)
15 | assert len(stats) == num_trials
16 | return stats
17 |
18 | observed_marbles = make_marble_sample()
19 | stats = resampled_stats(observed_marbles, 5)
20 |
21 | assert np.isclose(np.mean(stats), 0.7)
--------------------------------------------------------------------------------
/samples/python/bootstrap2.py:
--------------------------------------------------------------------------------
1 | from datascience import *
2 | from ds101 import *
3 |
4 | def make_marble_sample():
5 | table = Table().read_table('marble-sample.csv')
6 | return table.column('color')
7 |
8 | def proportion_blue(sample):
9 | return np.count_nonzero(sample == 'B') / len(sample)
10 |
11 | def resampled_stats(observed_marbles, num_trials):
12 | stats = bootstrap_statistic(observed_marbles,
13 | proportion_blue,
14 | num_trials)
15 | assert len(stats) == num_trials
16 | return stats
17 |
18 | observed_marbles = make_marble_sample()
19 | stats = resampled_stats(observed_marbles, 5)
20 |
21 | assert np.isclose(np.mean(stats), 0.7)
--------------------------------------------------------------------------------
/samples/python/ds101.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from datascience import *
3 |
4 | # fake library to hide identities...
5 |
6 | def bootstrap_statistic(observed_sample, compute_statistic, num_trials):
7 | """
8 | Creates num_trials resamples of the initial sample.
9 | Returns an array of the provided statistic for those samples.
10 |
11 | * observed_sample: the initial sample, as an array.
12 |
13 | * compute_statistic: a function that takes a sample as
14 | an array and returns the statistic for that
15 | sample.
16 |
17 | * num_trials: the number of bootstrap samples to create.
18 |
19 | """
20 |
21 | # Check that observed_sample is an array!
22 | if not isinstance(observed_sample, np.ndarray):
23 | raise ValueError('The first parameter to bootstrap_statistic must be a sample represented as an array, not a value of type ' + str(type(observed_sample).__name__))
24 |
25 | statistics = make_array()
26 |
27 | for i in np.arange(0, num_trials):
28 | #Key: in bootstrapping we must always sample with replacement
29 | simulated_resample = np.random.choice(observed_sample, len(observed_sample))
30 |
31 | resample_statistic = compute_statistic(simulated_resample)
32 | statistics = np.append(statistics, resample_statistic)
33 |
34 | return statistics
35 |
--------------------------------------------------------------------------------
/samples/python/marble-sample.csv:
--------------------------------------------------------------------------------
1 | color
2 | R
3 | R
4 | R
5 | R
6 | R
7 | R
8 | R
9 | R
10 | R
11 | B
12 | B
13 | B
14 | B
15 | B
16 | B
17 | B
18 | B
19 | B
20 | B
21 | B
22 | B
23 | B
24 | B
25 | B
26 | B
27 | B
28 | B
29 | B
30 | B
31 | B
32 |
--------------------------------------------------------------------------------
/samples/python/marbles.py:
--------------------------------------------------------------------------------
1 | from datascience import *
2 | from ds101 import *
3 |
4 | def make_marble_bag():
5 | table = Table().read_table('marble-sample.csv')
6 | return table.column('color')
7 |
8 | def ratio(x,y):
9 | return x / y
10 |
11 | def ratio_blue_to_red(sample):
12 | blues = np.count_nonzero(sample == 'B')
13 | reds = np.count_nonzero(sample == 'r')
14 | return ratio(blues, reds)
15 |
16 | marbles = make_marble_bag()
17 | if 'R' in marbles:
18 | print(ratio_blue_to_red(marbles))
19 |
--------------------------------------------------------------------------------
/samples/python/mean.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | from datascience import *
4 | from ds101 import bootstrap_statistic
5 |
6 | def make_marble_bag():
7 | table = Table().read_table('marble-sample.csv')
8 | return table.column('color')
9 |
10 | observed_marbles = make_marble_bag()
11 |
12 | def percent_blue(sample):
13 | return np.count_nonzero(sample == 'B') / len(sample)
14 |
15 | def main():
16 |
17 | num_trials = 5
18 |
19 | stats = bootstrap_statistic(observed_marbles,
20 | percent_blue,
21 | num_trials)
22 |
23 | assert np.isclose(np.mean(stats), 0.7)
24 |
25 | main()
--------------------------------------------------------------------------------
/samples/python/nb.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "9706dd9f-da01-4afc-8148-d5e51696e281",
6 | "metadata": {},
7 | "source": [
8 | "**You must set the kernel to be the ipyflow kernel for ChatDBG to work.**"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "637bdd5a-8e45-4f1d-8ad6-6e4d75e60d66",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "from datascience import *\n",
19 | "%pdb"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "id": "66b2ebbf-584a-4c5f-885d-f2ad667cadd1",
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "red_blue_proportions = make_array(0.3, 0.7)"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "id": "802a3ab2-599d-49dd-a5b4-3139d5e34269",
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "def make_marble_bag(size):\n",
40 | " return size * sample_proportions(red_blue_proportions, size)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "id": "b46dc5a5-c376-40b3-a98d-da0ea012c80a",
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "make_marble_bag(10)"
51 | ]
52 | }
53 | ],
54 | "metadata": {
55 | "ipyflow": {
56 | "cell_children": {
57 | "11a5ccf7-17d6-4600-ad3b-4369d00eac1a": [],
58 | "44d121b5-07d7-4b90-8fa5-bd0ad7ee2623": [],
59 | "45d9fcde-3d09-4bd0-a1ac-f8470fc81250": [],
60 | "637bdd5a-8e45-4f1d-8ad6-6e4d75e60d66": [],
61 | "66b2ebbf-584a-4c5f-885d-f2ad667cadd1": [
62 | "b46dc5a5-c376-40b3-a98d-da0ea012c80a"
63 | ],
64 | "679500c8-471d-4dd8-aad4-09f1942805fb": [],
65 | "802a3ab2-599d-49dd-a5b4-3139d5e34269": [
66 | "b46dc5a5-c376-40b3-a98d-da0ea012c80a"
67 | ],
68 | "a0ef25d9-d30c-4577-8285-c21fdd1fac2d": [],
69 | "b46dc5a5-c376-40b3-a98d-da0ea012c80a": [],
70 | "c13a73c3-68cb-4172-8f9d-0b0387b0c927": [
71 | "f7c07ae1-735d-4093-bd3e-7efa9291f2d2"
72 | ],
73 | "cf1aa8d4-c2fb-4c68-9861-3431df761d1a": [],
74 | "f7c07ae1-735d-4093-bd3e-7efa9291f2d2": []
75 | },
76 | "cell_parents": {
77 | "11a5ccf7-17d6-4600-ad3b-4369d00eac1a": [],
78 | "44d121b5-07d7-4b90-8fa5-bd0ad7ee2623": [],
79 | "45d9fcde-3d09-4bd0-a1ac-f8470fc81250": [],
80 | "637bdd5a-8e45-4f1d-8ad6-6e4d75e60d66": [],
81 | "66b2ebbf-584a-4c5f-885d-f2ad667cadd1": [],
82 | "679500c8-471d-4dd8-aad4-09f1942805fb": [],
83 | "802a3ab2-599d-49dd-a5b4-3139d5e34269": [],
84 | "a0ef25d9-d30c-4577-8285-c21fdd1fac2d": [],
85 | "b46dc5a5-c376-40b3-a98d-da0ea012c80a": [
86 | "66b2ebbf-584a-4c5f-885d-f2ad667cadd1",
87 | "802a3ab2-599d-49dd-a5b4-3139d5e34269"
88 | ],
89 | "c13a73c3-68cb-4172-8f9d-0b0387b0c927": [],
90 | "cf1aa8d4-c2fb-4c68-9861-3431df761d1a": [],
91 | "f7c07ae1-735d-4093-bd3e-7efa9291f2d2": [
92 | "c13a73c3-68cb-4172-8f9d-0b0387b0c927"
93 | ]
94 | }
95 | },
96 | "kernelspec": {
97 | "display_name": "Python 3 (ipyflow)",
98 | "language": "python",
99 | "name": "ipyflow"
100 | },
101 | "language_info": {
102 | "codemirror_mode": {
103 | "name": "ipython",
104 | "version": 3
105 | },
106 | "file_extension": ".py",
107 | "mimetype": "text/x-python",
108 | "name": "python",
109 | "nbconvert_exporter": "python",
110 | "pygments_lexer": "ipython3",
111 | "version": "3.11.7"
112 | }
113 | },
114 | "nbformat": 4,
115 | "nbformat_minor": 5
116 | }
117 |
--------------------------------------------------------------------------------
/samples/python/requirements.txt:
--------------------------------------------------------------------------------
1 | # packages necessary for the test files
2 | datascience
3 |
--------------------------------------------------------------------------------
/samples/python/sample.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | red_blue_proportions = np.array([0.3, 0.7])
4 |
5 | def make_sample(sample_size, probabilities):
6 | return np.random.multinomial(sample_size, probabilities)
7 |
8 | def make_marble_bag(size):
9 | return make_sample(red_blue_proportions, size)
10 |
11 | make_marble_bag(10)
12 |
--------------------------------------------------------------------------------
/samples/python/testme.py:
--------------------------------------------------------------------------------
1 | def tryme(x):
2 | count = 0
3 | for i in range(100):
4 | if x / i > 2:
5 | count += 1
6 | return count
7 |
8 | if __name__ == '__main__':
9 | print(tryme(100))
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/samples/rust/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | panic_log.txt
4 |
--------------------------------------------------------------------------------
/samples/rust/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "chatdbg_rust_tests"
3 | version = "1.0.0"
4 | edition = "2021"
5 |
6 | [[bin]]
7 | name = "test-failed-assert"
8 | path = "test-failed-assert.rs"
9 |
10 | [dependencies]
11 | chatdbg = { path = "../../rust-support/chatdbg" }
12 |
13 | [profile.dev]
14 | panic = "abort"
15 |
16 | [profile.release]
17 | panic = "abort"
18 |
19 |
--------------------------------------------------------------------------------
/samples/rust/test-failed-assert.rs:
--------------------------------------------------------------------------------
1 | fn fact(n: f32) -> f32 {
2 | let mut x = 1.0;
3 | for i in 0..n as i32 {
4 | x *= i as f32;
5 | }
6 | assert!(x != 0.0);
7 | x
8 | }
9 |
10 | #[chatdbg::main]
11 | fn main() {
12 | println!("{}", fact(100.0));
13 | }
14 |
--------------------------------------------------------------------------------
/src/chatdbg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plasma-umass/ChatDBG/805a3d1900aebaaa26603b334e688d70c3d3a7bf/src/chatdbg/__init__.py
--------------------------------------------------------------------------------
/src/chatdbg/__main__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from getopt import GetoptError
3 |
4 | import ipdb
5 |
6 | from chatdbg.chatdbg_pdb import ChatDBG
7 | from chatdbg.util.config import chatdbg_config
8 | from chatdbg.util.help import print_help
9 |
10 |
11 | def main() -> None:
12 | ipdb.__main__._get_debugger_cls = lambda: ChatDBG
13 |
14 | args = chatdbg_config.parse_user_flags(sys.argv[1:])
15 |
16 | if "-h" in args or "--help" in args:
17 | print_help()
18 |
19 | sys.argv = [sys.argv[0]] + args
20 |
21 | try:
22 | ipdb.__main__.main()
23 | except GetoptError as e:
24 | print(f"Unrecognized option: {e.opt}\n")
25 | print_help()
26 | sys.exit(1)
27 |
28 |
29 | if __name__ == "__main__":
30 | main()
31 |
--------------------------------------------------------------------------------
/src/chatdbg/assistant/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plasma-umass/ChatDBG/805a3d1900aebaaa26603b334e688d70c3d3a7bf/src/chatdbg/assistant/__init__.py
--------------------------------------------------------------------------------
/src/chatdbg/assistant/assistant.py:
--------------------------------------------------------------------------------
1 | import json
2 | import string
3 | import textwrap
4 | import time
5 | import pprint
6 |
7 | import warnings
8 |
9 | with warnings.catch_warnings():
10 | warnings.simplefilter("ignore")
11 | import litellm
12 |
13 | import openai
14 |
15 | from ..util.trim import sandwich_tokens, trim_messages
16 | from ..util.text import strip_ansi
17 | from .listeners import Printer
18 |
19 |
20 | class AssistantError(Exception):
21 | def __init__(self, *args: object) -> None:
22 | super().__init__(*args)
23 |
24 |
25 | def remove_non_printable_chars(s: str) -> str:
26 | printable_chars = set(string.printable)
27 | filtered_string = "".join(filter(lambda x: x in printable_chars, s))
28 | return filtered_string
29 |
30 |
31 | class Assistant:
32 | def __init__(
33 | self,
34 | instructions,
35 | model="gpt-4o",
36 | timeout=30,
37 | listeners=[Printer()],
38 | functions=[],
39 | max_call_response_tokens=2048,
40 | ):
41 |
42 | # Hide their debugging info -- it messes with our error handling
43 | litellm.suppress_debug_info = True
44 |
45 | self._clients = listeners
46 |
47 | self._functions = {}
48 | for f in functions:
49 | self._add_function(f)
50 |
51 | self._model = model
52 | self._timeout = timeout
53 | self._conversation = [{"role": "system", "content": instructions}]
54 | self._max_call_response_tokens = max_call_response_tokens
55 |
56 | self._check_model()
57 | self._broadcast("on_begin_dialog", instructions)
58 |
59 | def close(self):
60 | self._broadcast("on_end_dialog")
61 |
62 | def _warn_about_exception(self, e, message="Unexpected Exception"):
63 | import traceback
64 |
65 | tb_lines = traceback.format_exception(type(e), e, e.__traceback__)
66 | tb_string = "".join(tb_lines)
67 | self._broadcast("on_error", f"{message}\n\n{e}\n{tb_string}")
68 |
69 | def query(self, prompt: str, user_text):
70 | """
71 | Send a query to the LLM.
72 | - prompt is the prompt to send.
73 | - user_text is what the user typed (which may or not be the same as prompt)
74 |
75 | Returns a dictionary containing:
76 | - "completed": True of the query ran to completion.
77 | - "cost": Cost of query, or 0 if not completed.
78 | Other fields only if completed is True
79 | - "time": completion time in seconds
80 | - "model": the model used.
81 | - "tokens": total tokens
82 | - "prompt_tokens": our prompts
83 | - "completion_tokens": the LLM completions part
84 | """
85 | stats = {"completed": False, "cost": 0}
86 | start = time.time()
87 |
88 | self._broadcast("on_begin_query", prompt, user_text)
89 | try:
90 | stats = self._streamed_query(prompt, user_text)
91 | elapsed = time.time() - start
92 |
93 | stats["time"] = elapsed
94 | stats["model"] = self._model
95 | stats["completed"] = True
96 | stats["message"] = f"\n[Cost: ~${stats['cost']:.2f} USD]"
97 | except openai.OpenAIError as e:
98 | self._warn_about_exception(e, f"Unexpected OpenAI Error. Retry the query.")
99 | stats["message"] = f"[Exception: {e}]"
100 | except KeyboardInterrupt:
101 | # user action -- just ignore
102 | stats["message"] = "[Chat Interrupted]"
103 | except Exception as e:
104 | self._warn_about_exception(e, f"Unexpected Exception.")
105 | stats["message"] = f"[Exception: {e}]"
106 |
107 | self._broadcast("on_end_query", stats)
108 | return stats
109 |
110 | def _report(self, stats):
111 | if stats["completed"]:
112 | print()
113 | else:
114 | print("[Chat Interrupted]")
115 |
116 | def _broadcast(self, method_name, *args):
117 | for client in self._clients:
118 | method = getattr(client, method_name, None)
119 | if callable(method):
120 | method(*args)
121 |
122 | def _check_model(self):
123 | result = litellm.validate_environment(self._model)
124 | missing_keys = result["missing_keys"]
125 | if missing_keys != []:
126 | _, provider, _, _ = litellm.get_llm_provider(self._model)
127 | if provider == "openai":
128 | raise AssistantError(
129 | textwrap.dedent(
130 | f"""\
131 | You need an OpenAI key to use the {self._model} model.
132 | You can get a key here: https://platform.openai.com/api-keys.
133 | Set the environment variable OPENAI_API_KEY to your key value."""
134 | )
135 | )
136 | else:
137 | raise AssistantError(
138 | textwrap.dedent(
139 | f"""\
140 | You need to set the following environment variables
141 | to use the {self._model} model: {', '.join(missing_keys)}."""
142 | )
143 | )
144 |
145 | try:
146 | if not litellm.supports_function_calling(self._model):
147 | raise AssistantError(
148 | textwrap.dedent(
149 | f"""\
150 | The {self._model} model does not support function calls.
151 | You must use a model that does, eg. gpt-4."""
152 | )
153 | )
154 | except:
155 | raise AssistantError(
156 | textwrap.dedent(
157 | f"""\
158 | {self._model} does not appear to be a supported model.
159 | See https://docs.litellm.ai/docs/providers."""
160 | )
161 | )
162 |
163 | def _add_function(self, function):
164 | """
165 | Add a new function to the list of function tools.
166 | The function should have the necessary json spec as its docstring
167 | """
168 | schema = json.loads(function.__doc__)
169 | assert "name" in schema, "Bad JSON in docstring for function tool."
170 | self._functions[schema["name"]] = {"function": function, "schema": schema}
171 |
172 | def _make_call(self, tool_call) -> str:
173 | name = tool_call.function.name
174 | try:
175 | args = json.loads(tool_call.function.arguments)
176 | function = self._functions[name]
177 | call, result = function["function"](**args)
178 | result = remove_non_printable_chars(strip_ansi(result).expandtabs())
179 | self._broadcast("on_function_call", call, result)
180 | except KeyboardInterrupt as e:
181 | raise e
182 | except Exception as e:
183 | # likely to be an exception from the code we ran, not a bug...
184 | result = f"Exception in function call: {e}"
185 | self._broadcast("on_warn", result)
186 | return result
187 |
188 | def _streamed_query(self, prompt: str, user_text):
189 | cost = 0
190 |
191 | self._conversation.append({"role": "user", "content": prompt})
192 |
193 | while True:
194 | stream = self._stream_completion()
195 |
196 | # litellm.stream_chunk_builder is broken for new GPT models
197 | # that have content before calls, so...
198 |
199 | # stream the response, collecting the tool_call parts separately
200 | # from the content
201 | try:
202 | self._broadcast("on_begin_stream")
203 | chunks = []
204 | tool_chunks = []
205 | for chunk in stream:
206 | chunks.append(chunk)
207 | if chunk.choices[0].delta.content:
208 | self._broadcast(
209 | "on_stream_delta", chunk.choices[0].delta.content
210 | )
211 | else:
212 | tool_chunks.append(chunk)
213 | finally:
214 | self._broadcast("on_end_stream")
215 |
216 | # then compute for the part that litellm gives back.
217 | completion = litellm.stream_chunk_builder(
218 | chunks, messages=self._conversation
219 | )
220 | cost += litellm.completion_cost(completion)
221 |
222 | # add content to conversation, but if there is no content, then the message
223 | # has only tool calls, and skip this step
224 | response_message = completion.choices[0].message
225 | if response_message.content != None:
226 | # fix: remove tool calls. They get added below.
227 | response_message = response_message.copy()
228 | response_message["tool_calls"] = None
229 | self._conversation.append(response_message.json())
230 |
231 | if response_message.content != None:
232 | self._broadcast("on_response", response_message.content)
233 |
234 | if completion.choices[0].finish_reason == "tool_calls":
235 | # create a message with just the tool calls, append that to the conversation, and generate the responses.
236 | tool_completion = litellm.stream_chunk_builder(
237 | tool_chunks, self._conversation
238 | )
239 |
240 | # this part wasn't counted above...
241 | cost += litellm.completion_cost(tool_completion)
242 |
243 | tool_message = tool_completion.choices[0].message
244 |
245 | tool_json = tool_message.json()
246 |
247 | # patch for litellm sometimes putting index fields in the tool calls it constructs
248 | # in stream_chunk_builder. gpt-4-turbo-2024-04-09 can't handle those index fields, so
249 | # just remove them for the moment.
250 | for tool_call in tool_json.get("tool_calls", []):
251 | _ = tool_call.pop("index", None)
252 |
253 | tool_json["role"] = "assistant"
254 | self._conversation.append(tool_json)
255 | self._add_function_results_to_conversation(tool_message)
256 | else:
257 | break
258 |
259 | stats = {
260 | "cost": cost,
261 | "tokens": completion.usage.total_tokens,
262 | "prompt_tokens": completion.usage.prompt_tokens,
263 | "completion_tokens": completion.usage.completion_tokens,
264 | }
265 | return stats
266 |
267 | def _stream_completion(self):
268 |
269 | self._trim_conversation()
270 |
271 | return litellm.completion(
272 | model=self._model,
273 | messages=self._conversation,
274 | tools=[
275 | {"type": "function", "function": f["schema"]}
276 | for f in self._functions.values()
277 | ],
278 | timeout=self._timeout,
279 | stream=True,
280 | )
281 |
282 | def _trim_conversation(self):
283 | old_len = litellm.token_counter(self._model, messages=self._conversation)
284 |
285 | self._conversation = trim_messages(self._conversation, self._model)
286 |
287 | new_len = litellm.token_counter(self._model, messages=self._conversation)
288 | if old_len != new_len:
289 | self._broadcast(
290 | "on_warn", f"Trimming conversation from {old_len} to {new_len} tokens."
291 | )
292 |
293 | def _add_function_results_to_conversation(self, response_message):
294 | response_message["role"] = "assistant"
295 | tool_calls = response_message.tool_calls
296 | try:
297 | for tool_call in tool_calls:
298 | function_response = self._make_call(tool_call)
299 | function_response = sandwich_tokens(
300 | function_response, self._model, self._max_call_response_tokens, 0.5
301 | )
302 | response = {
303 | "tool_call_id": tool_call.id,
304 | "role": "tool",
305 | "name": tool_call.function.name,
306 | "content": function_response,
307 | }
308 | self._conversation.append(response)
309 | except Exception as e:
310 | # Warning: potential infinite loop if the LLM keeps sending
311 | # the same bad call.
312 | self._broadcast(
313 | "on_error", f"An exception occured while processing tool calls: {e}"
314 | )
315 |
--------------------------------------------------------------------------------
/src/chatdbg/assistant/listeners.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import textwrap
3 |
4 |
5 | class BaseAssistantListener:
6 | """
7 | Events that the Assistant generates. Override these for the client.
8 | """
9 |
10 | # Dialogs capture 1 or more queries.
11 |
12 | def on_begin_dialog(self, instructions):
13 | pass
14 |
15 | def on_end_dialog(self):
16 | pass
17 |
18 | # Events for a single query
19 |
20 | def on_begin_query(self, prompt, user_text):
21 | pass
22 |
23 | def on_response(self, text):
24 | pass
25 |
26 | def on_function_call(self, call, result):
27 | pass
28 |
29 | def on_end_query(self, stats):
30 | pass
31 |
32 | # For clients wishing to stream responses
33 |
34 | def on_begin_stream(self):
35 | pass
36 |
37 | def on_stream_delta(self, text):
38 | pass
39 |
40 | def on_end_stream(self):
41 | pass
42 |
43 | # Notifications of non-fatal problems
44 |
45 | def on_warn(self, text):
46 | pass
47 |
48 | def on_error(self, text):
49 | pass
50 |
51 |
52 | class Printer(BaseAssistantListener):
53 | def __init__(self, out=sys.stdout):
54 | self.out = out
55 |
56 | def on_warn(self, text):
57 | print(textwrap.indent(text, "*** "), file=self.out)
58 |
59 | def on_error(self, text):
60 | print(textwrap.indent(text, "*** "), file=self.out)
61 |
62 | def on_begin_stream(self):
63 | pass
64 |
65 | def on_stream_delta(self, text):
66 | print(text, end="", file=self.out, flush=True)
67 |
68 | def on_end_stream(self):
69 | pass
70 |
71 | def on_begin_query(self, prompt, user_text):
72 | pass
73 |
74 | def on_end_query(self, stats):
75 | pass
76 |
77 | def on_response(self, text):
78 | if text != None:
79 | print(text, file=self.out)
80 |
81 | def on_function_call(self, call, result):
82 | if result and len(result) > 0:
83 | entry = f"{call}\n{result}"
84 | else:
85 | entry = f"{call}"
86 | print(entry, file=self.out)
87 |
88 |
89 | class StreamingPrinter(Printer):
90 | def __init__(self, out=sys.stdout):
91 | super().__init__(out)
92 |
93 | def on_begin_stream(self):
94 | print("", flush=True)
95 |
96 | def on_stream_delta(self, text):
97 | print(text, end="", file=self.out, flush=True)
98 |
99 | def on_end_stream(self):
100 | print("", flush=True)
101 |
102 | def on_response(self, text):
103 | pass
104 |
--------------------------------------------------------------------------------
/src/chatdbg/chatdbg_gdb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import atexit
3 | from typing import List, Optional, Union
4 |
5 | import gdb
6 |
7 | from chatdbg.native_util import clangd_lsp_integration
8 | from chatdbg.native_util.code import code
9 | from chatdbg.native_util.dbg_dialog import DBGDialog
10 | from chatdbg.native_util.stacks import (
11 | _ArgumentEntry,
12 | _FrameSummaryEntry,
13 | _SkippedFramesEntry,
14 | )
15 | from chatdbg.util.config import chatdbg_config
16 | from chatdbg.native_util.safety import command_is_safe
17 | from chatdbg.util.exit_message import chatdbg_was_called, print_exit_message
18 |
19 | # The file produced by the panic handler if the Rust program is using the chatdbg crate.
20 | RUST_PANIC_LOG_FILENAME = "panic_log.txt"
21 | PROMPT = "(ChatDBG gdb) "
22 |
23 | # Set the prompt to ChatDBG gdb
24 | gdb.prompt_hook = lambda current_prompt: PROMPT
25 |
26 |
27 | last_error_type = ""
28 |
29 | atexit.register(print_exit_message)
30 |
31 |
32 | def stop_handler(event):
33 | """Sets last error type so we can report it later."""
34 | # Check if the event is a stop event
35 | global last_error_type
36 | if not hasattr(event, "stop_signal"):
37 | last_error_type = "" # Not a real error (e.g., a breakpoint)
38 | return
39 | if event.stop_signal is not None:
40 | last_error_type = event.stop_signal
41 |
42 |
43 | gdb.events.stop.connect(stop_handler)
44 |
45 |
46 | class Code(gdb.Command):
47 |
48 | def __init__(self):
49 | gdb.Command.__init__(self, "code", gdb.COMMAND_USER)
50 |
51 | def invoke(self, command, from_tty):
52 | print(code(command))
53 | return
54 |
55 |
56 | Code()
57 |
58 |
59 | class Definition(gdb.Command):
60 |
61 | def __init__(self):
62 | gdb.Command.__init__(self, "definition", gdb.COMMAND_USER)
63 |
64 | def invoke(self, command, from_tty):
65 | print(clangd_lsp_integration.native_definition(command))
66 | return
67 |
68 |
69 | Definition()
70 |
71 |
72 | class Config(gdb.Command):
73 |
74 | def __init__(self):
75 | gdb.Command.__init__(self, "config", gdb.COMMAND_USER)
76 |
77 | def invoke(self, command, from_tty):
78 | args = command.split()
79 | message = chatdbg_config.parse_only_user_flags(args)
80 | print(message)
81 | return
82 |
83 |
84 | Config()
85 |
86 |
87 | # Implement the command `why`
88 | class Why(gdb.Command):
89 | """Provides root cause analysis for a failure."""
90 |
91 | def __init__(self):
92 | gdb.Command.__init__(self, "why", gdb.COMMAND_USER)
93 |
94 | def invoke(self, command, from_tty):
95 | try:
96 | dialog = GDBDialog(PROMPT)
97 | dialog.dialog(command)
98 | except Exception as e:
99 | print(str(e))
100 | return
101 |
102 |
103 | Why()
104 |
105 | gdb.execute("alias chat = why")
106 |
107 |
108 | class GDBDialog(DBGDialog):
109 |
110 | def __init__(self, prompt) -> None:
111 | chatdbg_was_called()
112 | super().__init__(prompt)
113 |
114 | def _message_is_a_bad_command_error(self, message):
115 | return message.strip().startswith("Undefined command:")
116 |
117 | def _run_one_command(self, command):
118 | try:
119 | return gdb.execute(command, to_string=True)
120 | except Exception as e:
121 | return str(e)
122 |
123 | def check_debugger_state(self):
124 | global last_error_type
125 | if not last_error_type:
126 | # Assume we are running from a core dump,
127 | # which _probably_ means a SEGV.
128 | last_error_type = "SIGSEGV"
129 | try:
130 | frame = gdb.selected_frame()
131 | block = frame.block()
132 | except gdb.error:
133 | self.fail(
134 | "Must be attached to a program that fails to use `why` or `chat`."
135 | )
136 | except RuntimeError:
137 | self.fail(
138 | "Your program must be compiled with debug information (`-g`) to use `why` or `chat`."
139 | )
140 |
141 | def _get_frame_summaries(
142 | self, max_entries: int = 20
143 | ) -> Optional[List[Union[_FrameSummaryEntry, _SkippedFramesEntry]]]:
144 | thread = gdb.selected_thread()
145 | if not thread:
146 | return None
147 |
148 | skipped = 0
149 | summaries: List[Union[_FrameSummaryEntry, _SkippedFramesEntry]] = []
150 |
151 | frame = gdb.selected_frame()
152 |
153 | index = -1
154 | # Walk the stack and build up the frames list.
155 | while frame is not None:
156 | index += 1
157 |
158 | name = frame.name()
159 | if not name:
160 | skipped += 1
161 | frame = frame.older()
162 | continue
163 | symtab_and_line = frame.find_sal()
164 |
165 | # Get frame file path
166 | if symtab_and_line.symtab is not None:
167 | file_path = symtab_and_line.symtab.fullname()
168 | if file_path == None:
169 | file_path = "[unknown]"
170 | else:
171 | # If we are in a subdirectory, use a relative path instead.
172 | if file_path.startswith(os.getcwd()):
173 | file_path = os.path.relpath(file_path)
174 | # Skip frames for which we have no source -- likely system frames.
175 | if not os.path.exists(file_path):
176 | skipped += 1
177 | frame = frame.older()
178 | continue
179 | else:
180 | file_path = None
181 |
182 | # Get frame lineno
183 | if symtab_and_line.line is not None:
184 | lineno = symtab_and_line.line
185 | else:
186 | lineno = None
187 |
188 | # Get arguments
189 | arguments: List[_ArgumentEntry] = []
190 | block = gdb.Block
191 | try:
192 | block = frame.block()
193 | except Exception:
194 | skipped += 1
195 | frame = frame.older()
196 | continue
197 | for symbol in block:
198 | if symbol.is_argument:
199 | typename = symbol.type
200 | name = symbol.name
201 | value = str(frame.read_var(name))
202 | arguments.append(_ArgumentEntry(typename, name, value))
203 |
204 | if skipped > 0:
205 | summaries.append(_SkippedFramesEntry(skipped))
206 | skipped = 0
207 |
208 | summaries.append(
209 | _FrameSummaryEntry(index, name, arguments, file_path, lineno)
210 | )
211 | if len(summaries) >= max_entries:
212 | break
213 | frame = frame.older()
214 |
215 | if skipped > 0:
216 | summaries.append(_SkippedFramesEntry(skipped))
217 |
218 | return summaries
219 |
220 | def _initial_prompt_error_message(self):
221 | # If the Rust panic log exists, append it to the error reason.
222 | global last_error_type
223 | try:
224 | with open(RUST_PANIC_LOG_FILENAME, "r") as log:
225 | panic_log = log.read()
226 | last_error_type = panic_log + "\n" + last_error_type
227 | except:
228 | pass
229 | return last_error_type
230 |
231 | def _initial_prompt_error_details(self):
232 | """Anything more beyond the initial error message to include."""
233 | return None
234 |
235 | def _initial_prompt_command_line(self):
236 | executable_path = gdb.selected_inferior().progspace.filename
237 |
238 | if executable_path.startswith(os.getcwd()):
239 | executable_path = os.path.join(".", os.path.relpath(executable_path))
240 |
241 | prefix = "Argument list to give program being debugged when it is started is "
242 | args = gdb.execute("show args", to_string=True).strip()
243 | if args.startswith(prefix):
244 | args = args[len(prefix) :].strip('."')
245 |
246 | return executable_path + " " + args
247 |
248 | def _initial_prompt_input(self):
249 | prefix = "Argument list to give program being debugged when it is started is "
250 | args = gdb.execute("show args", to_string=True).strip()
251 | if args.startswith(prefix):
252 | args = args[len(prefix) :].strip('."')
253 |
254 | input_pipe = args.find("<")
255 | if input_pipe != -1:
256 | input_file = args[input_pipe + 1 :].strip()
257 | try:
258 | content = open(input_file, "r").read()
259 | return content
260 | except Exception:
261 | self.fail(f"The detected input file {input_file} could not be read.")
262 |
263 | def _prompt_stack(self):
264 | """
265 | Return a simple backtrace to show the LLM where we are on the stack
266 | in followup prompts.
267 | """
268 | return None
269 |
270 | def llm_debug(self, command: str):
271 | """
272 | {
273 | "name": "debug",
274 | "description": "The `debug` function runs a GDB command on the stopped program and gets the response.",
275 | "parameters": {
276 | "type": "object",
277 | "properties": {
278 | "command": {
279 | "type": "string",
280 | "description": "The GDB command to run, possibly with arguments."
281 | }
282 | },
283 | "required": [ "command" ]
284 | }
285 | }
286 | """
287 | if not chatdbg_config.unsafe and not command_is_safe(command):
288 | self._unsafe_cmd = True
289 | return command, f"Command `{command}` is not allowed."
290 | return command, self._run_one_command(command)
291 |
--------------------------------------------------------------------------------
/src/chatdbg/chatdbg_lldb.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from typing import Optional, Union
4 |
5 | import lldb
6 |
7 | from chatdbg.native_util import clangd_lsp_integration
8 | from chatdbg.native_util.code import code
9 | from chatdbg.native_util.dbg_dialog import DBGDialog
10 | from chatdbg.native_util.stacks import (
11 | _ArgumentEntry,
12 | _FrameSummaryEntry,
13 | _SkippedFramesEntry,
14 | )
15 | from chatdbg.util.config import chatdbg_config
16 | from chatdbg.util.exit_message import chatdbg_was_called, print_exit_message
17 | from chatdbg.native_util.safety import command_is_safe
18 |
19 |
20 | # The file produced by the panic handler if the Rust program is using the chatdbg crate.
21 | RUST_PANIC_LOG_FILENAME = "panic_log.txt"
22 | PROMPT = "(ChatDBG lldb) "
23 |
24 |
25 | def __lldb_init_module(debugger: lldb.SBDebugger, internal_dict: dict) -> None:
26 | debugger.HandleCommand(f"settings set prompt '{PROMPT}'")
27 | debugger.SetDestroyCallback(print_exit_message)
28 | chatdbg_config.format = "md"
29 |
30 |
31 | @lldb.command("code")
32 | def _function_code(
33 | debugger: lldb.SBDebugger,
34 | command: str,
35 | result: lldb.SBCommandReturnObject,
36 | internal_dict: dict,
37 | ) -> None:
38 | result.AppendMessage(code(command))
39 |
40 |
41 | @lldb.command("definition")
42 | def _function_definition(
43 | debugger: lldb.SBDebugger,
44 | command: str,
45 | result: lldb.SBCommandReturnObject,
46 | internal_dict: dict,
47 | ) -> None:
48 | result.AppendMessage(clangd_lsp_integration.native_definition(command))
49 |
50 |
51 | @lldb.command("chat")
52 | @lldb.command("why")
53 | def chat(
54 | debugger: lldb.SBDebugger,
55 | command: str,
56 | result: lldb.SBCommandReturnObject,
57 | internal_dict: dict,
58 | ):
59 | try:
60 | dialog = LLDBDialog(PROMPT, debugger)
61 | dialog.dialog(command)
62 | except Exception as e:
63 | result.SetError(str(e))
64 |
65 |
66 | @lldb.command("config")
67 | def config(
68 | debugger: lldb.SBDebugger,
69 | command: str,
70 | result: lldb.SBCommandReturnObject,
71 | internal_dict: dict,
72 | ):
73 | args = command.split()
74 | message = chatdbg_config.parse_only_user_flags(args)
75 | result.AppendMessage(message)
76 |
77 |
78 | class LLDBDialog(DBGDialog):
79 |
80 | def __init__(self, prompt, debugger) -> None:
81 | super().__init__(prompt)
82 | chatdbg_was_called()
83 | self._debugger = debugger
84 |
85 | def _message_is_a_bad_command_error(self, message):
86 | return message.strip().endswith("is not a valid command.")
87 |
88 | def _run_one_command(self, command):
89 | interpreter = self._debugger.GetCommandInterpreter()
90 | result = lldb.SBCommandReturnObject()
91 | interpreter.HandleCommand(command, result)
92 |
93 | if result.Succeeded():
94 | return result.GetOutput()
95 | else:
96 | return result.GetError()
97 |
98 | def _is_debug_build(self) -> bool:
99 | """Returns False if not compiled with debug information."""
100 | target = self._debugger.GetSelectedTarget()
101 | if not target:
102 | return False
103 | for module in target.module_iter():
104 | for cu in module.compile_unit_iter():
105 | for line_entry in cu:
106 | if line_entry.GetLine() > 0:
107 | return True
108 | return False
109 |
110 | def get_thread(self) -> Optional[lldb.SBThread]:
111 | """
112 | Returns a currently stopped thread in the debugged process.
113 | :return: A currently stopped thread or None if no thread is stopped.
114 | """
115 | process = self._get_process()
116 | if not process:
117 | return None
118 | for thread in process:
119 | reason = thread.GetStopReason()
120 | if reason not in [lldb.eStopReasonNone, lldb.eStopReasonInvalid]:
121 | return thread
122 | return thread
123 |
124 | def check_debugger_state(self):
125 | if not self._debugger.GetSelectedTarget():
126 | self.fail("Must be attached to a program to use `why` or `chat`.")
127 |
128 | elif not self._is_debug_build():
129 | self.fail(
130 | "Your program must be compiled with debug information (`-g`) to use `why` or `chat`."
131 | )
132 |
133 | thread = self.get_thread()
134 | if not thread:
135 | self.fail("must run the code first to use `chat`.")
136 |
137 | if not clangd_lsp_integration.is_available():
138 | self.warn(
139 | "`clangd` was not found. The `find_definition` function will not be made available."
140 | )
141 |
142 | def _get_frame_summaries(
143 | self, max_entries: int = 20
144 | ) -> Optional[list[Union[_FrameSummaryEntry, _SkippedFramesEntry]]]:
145 | thread = self.get_thread()
146 | if not thread:
147 | return None
148 |
149 | skipped = 0
150 | summaries: list[Union[_FrameSummaryEntry, _SkippedFramesEntry]] = []
151 |
152 | index = -1
153 | # For each frame in thread
154 | for frame in thread:
155 | index += 1
156 | if not frame.GetDisplayFunctionName():
157 | skipped += 1
158 | continue
159 | name = frame.GetDisplayFunctionName().split("(")[0]
160 | # Get function arguments, store as _ArgumentEntries
161 | arguments: list[_ArgumentEntry] = []
162 | for j in range(
163 | frame.GetFunction().GetType().GetFunctionArgumentTypes().GetSize()
164 | ):
165 | arg = frame.FindVariable(frame.GetFunction().GetArgumentName(j))
166 | if not arg:
167 | arguments.append(
168 | _ArgumentEntry("[unknown]", "[unknown]", "[unknown]")
169 | )
170 | continue
171 | # TODO: Check if we should simplify / truncate types, e.g. std::unordered_map.
172 | arguments.append(
173 | _ArgumentEntry(arg.GetTypeName(), arg.GetName(), arg.GetValue())
174 | )
175 |
176 | # Look for paths to the function file. If there's no source, skip frame.
177 | line_entry = frame.GetLineEntry()
178 | file_path = line_entry.GetFileSpec().fullpath
179 | if file_path == None:
180 | file_path = "[unknown]"
181 | lineno = line_entry.GetLine()
182 |
183 | # If we are in a subdirectory, use a relative path instead.
184 | if file_path.startswith(os.getcwd()):
185 | file_path = os.path.relpath(file_path)
186 |
187 | # Skip frames for which we have no source -- likely system frames.
188 | if not os.path.exists(file_path):
189 | skipped += 1
190 | continue
191 |
192 | # Add _SkippedFramesEntry onto summaries list
193 | if skipped > 0:
194 | summaries.append(_SkippedFramesEntry(skipped))
195 | skipped = 0
196 |
197 | # Otherwise, add _FrameSummaryEntries until max_entries, then break
198 | summaries.append(
199 | _FrameSummaryEntry(index, name, arguments, file_path, lineno)
200 | )
201 | if len(summaries) >= max_entries:
202 | break
203 |
204 | if skipped > 0:
205 | summaries.append(_SkippedFramesEntry(skipped))
206 | if len(summaries) > max_entries:
207 | summaries.pop(-2)
208 |
209 | total_summary_count = sum(
210 | [s.count() if isinstance(s, _SkippedFramesEntry) else 1 for s in summaries]
211 | )
212 |
213 | if total_summary_count < len(thread):
214 | if isinstance(summaries[-1], _SkippedFramesEntry):
215 | summaries[-1] = _SkippedFramesEntry(
216 | len(thread) - total_summary_count + summaries[-1].count()
217 | )
218 | else:
219 | summaries.append(
220 | _SkippedFramesEntry(len(thread) - total_summary_count + 1)
221 | )
222 | if len(summaries) > max_entries:
223 | summaries.pop(-2)
224 |
225 | assert sum(
226 | [s.count() if isinstance(s, _SkippedFramesEntry) else 1 for s in summaries]
227 | ) == len(thread)
228 |
229 | return summaries
230 |
231 | def _get_process(self) -> Optional[lldb.SBProcess]:
232 | """
233 | Get the process that the current target owns.
234 | :return: An lldb object representing the process (lldb.SBProcess) that this target owns.
235 | """
236 | target = self._debugger.GetSelectedTarget()
237 | return target.process if target else None
238 |
239 | def _initial_prompt_error_message(self):
240 | thread = self.get_thread()
241 |
242 | error_message = thread.GetStopDescription(1024) if thread else None
243 | if error_message:
244 | return error_message
245 | else:
246 | self.warn("could not generate an error message.")
247 | return None
248 |
249 | def _initial_prompt_command_line(self):
250 | executable = self._debugger.GetSelectedTarget().GetExecutable()
251 |
252 | executable_path = os.path.join(
253 | executable.GetDirectory(), executable.GetFilename()
254 | )
255 | if executable_path.startswith(os.getcwd()):
256 | executable_path = os.path.join(".", os.path.relpath(executable_path))
257 |
258 | command_line_arguments = [
259 | self._debugger.GetSelectedTarget().GetLaunchInfo().GetArgumentAtIndex(i)
260 | for i in range(
261 | self._debugger.GetSelectedTarget().GetLaunchInfo().GetNumArguments()
262 | )
263 | ]
264 |
265 | command_line_invocation = " ".join([executable_path, *command_line_arguments])
266 | if command_line_invocation:
267 | return command_line_invocation
268 | else:
269 | self.warn("could not retrieve the command line invocation.")
270 | return None
271 |
272 | def _initial_prompt_input(self):
273 | stream = lldb.SBStream()
274 | self._debugger.GetSetting("target.input-path").GetAsJSON(stream)
275 | entry = json.loads(stream.GetData())
276 |
277 | input_path = entry if entry else None
278 | if input_path:
279 | try:
280 | with open(input_path, "r", errors="ignore") as file:
281 | input_contents = file.read()
282 | return input_contents
283 | except FileNotFoundError:
284 | self.warn("could not retrieve the input data.")
285 | return None
286 |
287 | def _initial_prompt_error_details(self):
288 | """Anything more beyond the initial error message to include."""
289 | return None
290 |
291 | def _prompt_stack(self):
292 | """
293 | Return a simple backtrace to show the LLM where we are on the stack
294 | in followup prompts.
295 | """
296 | return None
297 |
298 | def llm_debug(self, command: str):
299 | """
300 | {
301 | "name": "debug",
302 | "description": "The `debug` function runs an LLDB command on the stopped program and gets the response.",
303 | "parameters": {
304 | "type": "object",
305 | "properties": {
306 | "command": {
307 | "type": "string",
308 | "description": "The LLDB command to run, possibly with arguments."
309 | }
310 | },
311 | "required": [ "command" ]
312 | }
313 | }
314 | """
315 | if not chatdbg_config.unsafe and not command_is_safe(command):
316 | self._unsafe_cmd = True
317 | return command, f"Command `{command}` is not allowed."
318 | return command, self._run_one_command(command)
319 |
--------------------------------------------------------------------------------
/src/chatdbg/custom_pdb/prompts.py:
--------------------------------------------------------------------------------
1 | _intro = f"""\
2 | You are a debugging assistant. You will be given a Python stack trace for an
3 | error and answer questions related to the root cause of the error.
4 | """
5 |
6 | _pdb_function = f"""\
7 | Call the `pdb` function to run Pdb debugger commands on the stopped program. You
8 | may call the `pdb` function to run the following commands: `bt`, `up`, `down`,
9 | `p expression`, `list`.
10 |
11 | Call `pdb` to print any variable value or expression that you believe may
12 | contribute to the error.
13 | """
14 |
15 |
16 | _info_function = """\
17 | Call the `info` function to get the documentation and source code for any
18 | variable, function, package, class, method reference, field reference, or
19 | dotted reference visible in the current frame. Examples include: n, e.n
20 | where e is an expression, and t.n where t is a type.
21 |
22 | Unless it is from a common, widely-used library, you MUST call `info` exactly once on any
23 | symbol that is referenced in code leading up to the error.
24 | """
25 |
26 |
27 | _slice_function = """\
28 | Call the `slice` function to get the code used to produce
29 | the value currently stored a variable. You MUST call `slice` exactly once on any
30 | variable used but not defined in the current frame's code.
31 | """
32 |
33 | _take_the_wheel_instructions = """\
34 | Call the provided functions as many times as you would like.
35 | """
36 |
37 | _general_instructions = f"""\
38 | The root cause of any error is likely due to a problem in the source code from the user.
39 |
40 | Explain why each variable contributing to the error has been set
41 | to the value that it has.
42 |
43 | Continue with your explanations until you reach the root cause of the error. Your answer may be as long as necessary.
44 |
45 | End your answer with a section titled "##### Recommendation\\n" that contains one of:
46 | * a fix if you have identified the root cause
47 | * a numbered list of 1-3 suggestions for how to continue debugging if you have not
48 | """
49 |
50 |
51 | _wheel_and_slice = f"""\
52 | {_intro}
53 | {_pdb_function}
54 | {_info_function}
55 | {_slice_function}
56 | {_take_the_wheel_instructions}
57 | {_general_instructions}
58 | """
59 |
60 | _wheel_no_slice = f"""\
61 | {_intro}
62 | {_pdb_function}
63 | {_info_function}
64 | {_take_the_wheel_instructions}
65 | {_general_instructions}
66 | """
67 |
68 | _no_wheel = f"""\
69 | {_intro}
70 | {_general_instructions}
71 | """
72 |
73 |
74 | def pdb_instructions(supports_flow: bool, take_the_wheel: bool) -> str:
75 | if take_the_wheel:
76 | if supports_flow:
77 | return _wheel_and_slice
78 | else:
79 | return _wheel_no_slice
80 | else:
81 | return _no_wheel
82 |
--------------------------------------------------------------------------------
/src/chatdbg/custom_pdb/text.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Union
3 |
4 |
5 | def make_arrow(pad):
6 | """generate the leading arrow in front of traceback or debugger"""
7 | if pad >= 2:
8 | return "-" * (pad - 2) + "> "
9 | elif pad == 1:
10 | return ">"
11 | return ""
12 |
13 |
14 | def strip_color(s: str) -> str:
15 | ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
16 | return ansi_escape.sub("", s)
17 |
18 |
19 | def truncate_proportionally(
20 | text: str, maxlen: int = 32000, top_proportion: Union[float, int] = 0.5
21 | ) -> str:
22 | """Omit part of a string if needed to make it fit in a maximum length."""
23 | if len(text) > maxlen:
24 | pre = max(0, int((maxlen - 3) * top_proportion))
25 | post = max(0, maxlen - 3 - pre)
26 | return text[:pre] + "..." + text[len(text) - post :]
27 | return text
28 |
--------------------------------------------------------------------------------
/src/chatdbg/native_util/clangd_lsp_integration.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import subprocess
4 | import urllib.parse
5 |
6 | import llm_utils
7 |
8 |
9 | def _to_lsp_request(id, method, params):
10 | request = {"jsonrpc": "2.0", "id": id, "method": method}
11 | if params:
12 | request["params"] = params
13 |
14 | content = json.dumps(request)
15 | header = f"Content-Length: {len(content)}\r\n\r\n"
16 | return header + content
17 |
18 |
19 | # Same as a request, but without an id.
20 | def _to_lsp_notification(method, params):
21 | request = {"jsonrpc": "2.0", "method": method}
22 | if params:
23 | request["params"] = params
24 |
25 | content = json.dumps(request)
26 | header = f"Content-Length: {len(content)}\r\n\r\n"
27 | return header + content
28 |
29 |
30 | def _parse_lsp_response(id, file):
31 | # Ignore all messages until the response with the correct id is found.
32 | while True:
33 | header = {}
34 | while True:
35 | line = file.readline().strip()
36 | if not line:
37 | break
38 | key, value = line.split(":", 1)
39 | header[key.strip()] = value.strip()
40 |
41 | content = file.read(int(header["Content-Length"]))
42 | response = json.loads(content)
43 | if "id" in response and response["id"] == id:
44 | return response
45 |
46 |
47 | def _path_to_uri(path):
48 | return "file://" + os.path.abspath(path)
49 |
50 |
51 | def uri_to_path(uri):
52 | data = urllib.parse.urlparse(uri)
53 |
54 | assert data.scheme == "file"
55 | assert not data.netloc
56 | assert not data.params
57 | assert not data.query
58 | assert not data.fragment
59 |
60 | path = data.path
61 | if path.startswith(os.getcwd()):
62 | path = os.path.relpath(path, os.getcwd())
63 | return urllib.parse.unquote(path) # clangd seems to escape paths.
64 |
65 |
66 | def is_available(executable="clangd"):
67 | try:
68 | clangd = subprocess.run(
69 | [executable, "--version"],
70 | stdout=subprocess.DEVNULL,
71 | stderr=subprocess.DEVNULL,
72 | )
73 | return clangd.returncode == 0
74 | except FileNotFoundError:
75 | return False
76 |
77 |
78 | class clangd:
79 | def __init__(
80 | self,
81 | executable="clangd",
82 | working_directory=os.getcwd(),
83 | stderr=subprocess.DEVNULL,
84 | ):
85 | self.id = 0
86 | self.process = subprocess.Popen(
87 | [executable],
88 | text=True,
89 | stdin=subprocess.PIPE,
90 | stdout=subprocess.PIPE,
91 | stderr=stderr,
92 | cwd=working_directory,
93 | )
94 | self.initialize()
95 |
96 | def __del__(self):
97 | self.process.terminate()
98 |
99 | def initialize(self):
100 | self.id += 1
101 | request = _to_lsp_request(self.id, "initialize", {"processId": os.getpid()})
102 | self.process.stdin.write(request)
103 | self.process.stdin.flush()
104 | return _parse_lsp_response(self.id, self.process.stdout)
105 | # TODO: Assert there is no error.
106 |
107 | def didOpen(self, filename, languageId):
108 | with open(filename, "r") as file:
109 | text = file.read()
110 |
111 | notification = _to_lsp_notification(
112 | "textDocument/didOpen",
113 | {
114 | "textDocument": {
115 | "uri": _path_to_uri(filename),
116 | "languageId": languageId,
117 | "version": 1,
118 | "text": text,
119 | }
120 | },
121 | )
122 | self.process.stdin.write(notification)
123 | self.process.stdin.flush()
124 |
125 | def didClose(self, filename):
126 | notification = _to_lsp_notification(
127 | "textDocument/didClose", {"textDocument": {"uri": _path_to_uri(filename)}}
128 | )
129 | self.process.stdin.write(notification)
130 | self.process.stdin.flush()
131 |
132 | def definition(self, filename, line, character):
133 | self.id += 1
134 | request = _to_lsp_request(
135 | self.id,
136 | "textDocument/definition",
137 | {
138 | "textDocument": {"uri": _path_to_uri(filename)},
139 | "position": {
140 | # Things are 0-indexed in LSP.
141 | "line": line - 1,
142 | "character": character - 1,
143 | },
144 | },
145 | )
146 | self.process.stdin.write(request)
147 | self.process.stdin.flush()
148 | return _parse_lsp_response(self.id, self.process.stdout)
149 |
150 |
151 | def native_definition(command):
152 | if not is_available():
153 | return "`clangd` was not found. The `definition` function will not be made available."
154 | last_space_index = command.rfind(" ")
155 | if last_space_index == -1:
156 | return "usage: definition : "
157 | filename_lineno = command[:last_space_index]
158 | symbol = command[last_space_index + 1 :]
159 | parts = filename_lineno.split(":")
160 | if len(parts) != 2:
161 | return "usage: definition : "
162 | filename, lineno = parts[0], int(parts[1])
163 |
164 | try:
165 | with open(filename, "r") as file:
166 | lines = file.readlines()
167 | except FileNotFoundError:
168 | return f"file '{filename}' not found."
169 |
170 | if lineno - 1 >= len(lines):
171 | return "symbol not found at that location."
172 |
173 | # We just return the first match here. Maybe we should find all definitions.
174 | character = lines[lineno - 1].find(symbol)
175 |
176 | # Now, some heuristics to make up for GPT's terrible math skills.
177 | if character == -1:
178 | symbol = symbol.lstrip("*")
179 | character = lines[lineno - 1].find(symbol)
180 |
181 | if character == -1:
182 | symbol = symbol.split("::")[-1]
183 | character = lines[lineno - 1].find(symbol)
184 |
185 | # Check five lines above and below.
186 | if character == -1:
187 | for i in range(-5, 6, 1):
188 | if lineno - 1 + i < 0 or lineno - 1 + i >= len(lines):
189 | continue
190 | character = lines[lineno - 1 + i].find(symbol)
191 | if character != -1:
192 | lineno += i
193 | break
194 |
195 | if character == -1:
196 | return "symbol not found at that location."
197 |
198 | _clangd = None
199 | if is_available():
200 | _clangd = clangd()
201 |
202 | _clangd.didOpen(filename, "c" if filename.endswith(".c") else "cpp")
203 | definition = _clangd.definition(filename, lineno, character + 1)
204 | _clangd.didClose(filename)
205 |
206 | if "result" not in definition or not definition["result"]:
207 | return "No definition found."
208 |
209 | path = uri_to_path(definition["result"][0]["uri"])
210 | start_lineno = definition["result"][0]["range"]["start"]["line"] + 1
211 | end_lineno = definition["result"][0]["range"]["end"]["line"] + 1
212 | lines, first = llm_utils.read_lines(path, start_lineno - 5, end_lineno + 5)
213 | content = llm_utils.number_group_of_lines(lines, first)
214 | line_string = (
215 | f"line {start_lineno}"
216 | if start_lineno == end_lineno
217 | else f"lines {start_lineno}-{end_lineno}"
218 | )
219 | return f"""File '{path}' at {line_string}:\n```\n{content}\n```"""
220 |
--------------------------------------------------------------------------------
/src/chatdbg/native_util/code.py:
--------------------------------------------------------------------------------
1 | import llm_utils
2 |
3 |
4 | def code(command):
5 | parts = command.split(":")
6 | if len(parts) != 2:
7 | return "usage: code :"
8 | filename, lineno = parts[0], int(parts[1])
9 | try:
10 | lines, first = llm_utils.read_lines(filename, lineno - 7, lineno + 3)
11 | except FileNotFoundError:
12 | return f"file '{filename}' not found."
13 | return llm_utils.number_group_of_lines(lines, first)
14 |
--------------------------------------------------------------------------------
/src/chatdbg/native_util/dbg_dialog.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from . import clangd_lsp_integration
4 | from ..util.prompts import (
5 | build_followup_prompt,
6 | build_initial_prompt,
7 | initial_instructions,
8 | )
9 |
10 | from ..assistant.assistant import Assistant
11 | from ..util.config import chatdbg_config
12 | from ..util.history import CommandHistory
13 | from ..util.log import ChatDBGLog
14 | from .stacks import build_enriched_stacktrace
15 |
16 |
17 | class DBGError(Exception):
18 |
19 | def __init__(self, message):
20 | self.message = message
21 | super().__init__(self.message)
22 |
23 |
24 | class DBGDialog:
25 | # The log file used by the listener on the Assistant
26 | _log = ChatDBGLog(
27 | log_filename=chatdbg_config.log,
28 | config=chatdbg_config.to_json(),
29 | capture_streams=False, # don't have access to target's stdout/stderr here.
30 | )
31 |
32 | def __init__(self, prompt) -> None:
33 | self._prompt = prompt
34 | self._history = CommandHistory(self._prompt)
35 | self._unsafe_cmd = False
36 |
37 | def query_and_print(self, assistant, user_text, is_followup):
38 | prompt = self.build_prompt(user_text, is_followup)
39 |
40 | self._history.clear()
41 | print(assistant.query(prompt, user_text)["message"])
42 | if self._unsafe_cmd:
43 | self.warn(
44 | f"Warning: One or more debugger commands were blocked as potentially unsafe.\nWarning: You can disable sanitizing with `config --unsafe` and try again at your own risk."
45 | )
46 | self._unsafe_cmd = False
47 |
48 | def dialog(self, user_text):
49 | assistant = self._make_assistant()
50 | self.check_debugger_state()
51 |
52 | self.query_and_print(assistant, user_text, False)
53 | while True:
54 | try:
55 | command = input("(ChatDBG chatting) ").strip()
56 | if command in ["exit", "quit"]:
57 | break
58 | if command in ["chat", "why"]:
59 | self.query_and_print(assistant, command, True)
60 | elif command == "history":
61 | print(self._history)
62 | else:
63 | # Send the next input as an LLDB command
64 | result = self._run_one_command(command)
65 | if self._message_is_a_bad_command_error(result):
66 | # If result is not a recognized command, pass it as a query
67 | self.query_and_print(assistant, command, True)
68 | else:
69 | if command != "test_prompt":
70 | self._history.append(command, result)
71 | print(result)
72 | except EOFError:
73 | # If it causes an error, break
74 | break
75 |
76 | assistant.close()
77 |
78 | # Return string for valid command. None if the command is not valid.
79 | def _run_one_command(self, command):
80 | pass
81 |
82 | def _message_is_a_bad_command_error(self, message):
83 | pass
84 |
85 | def check_debugger_state(self):
86 | pass
87 |
88 | def _get_frame_summaries(self, max_entries: int = 20):
89 | pass
90 |
91 | def initial_prompt_instructions(self):
92 | functions = self._supported_functions()
93 | return initial_instructions(functions)
94 |
95 | def _initial_prompt_enchriched_stack_trace(self):
96 | return build_enriched_stacktrace(self._get_frame_summaries())
97 |
98 | def _initial_prompt_error_message(self):
99 | return None
100 |
101 | def _initial_prompt_error_details(self):
102 | """Anything more beyond the initial error message to include."""
103 | return None
104 |
105 | def _initial_prompt_command_line(self):
106 | return None
107 |
108 | def _initial_prompt_input(self):
109 | return None
110 |
111 | def _prompt_stack(self):
112 | """
113 | Return a simple backtrace to show the LLM where we are on the stack
114 | in followup prompts.
115 | """
116 | return None
117 |
118 | def _prompt_history(self):
119 | return str(self._history)
120 |
121 | def build_prompt(self, arg, conversing):
122 | if not conversing:
123 | return build_initial_prompt(
124 | self._initial_prompt_enchriched_stack_trace(),
125 | self._initial_prompt_error_message(),
126 | self._initial_prompt_error_details(),
127 | self._initial_prompt_command_line(),
128 | self._initial_prompt_input(),
129 | self._prompt_history(),
130 | user_text=arg,
131 | )
132 | else:
133 | return build_followup_prompt(
134 | self._prompt_history(), self._prompt_stack(), arg
135 | )
136 |
137 | def llm_debug(self, command: str) -> str:
138 | pass
139 |
140 | def llm_get_code_surrounding(self, filename: str, line_number: int) -> str:
141 | """
142 | {
143 | "name": "get_code_surrounding",
144 | "description": "The `get_code_surrounding` function returns the source code in the given file surrounding and including the provided line number.",
145 | "parameters": {
146 | "type": "object",
147 | "properties": {
148 | "filename": {
149 | "type": "string",
150 | "description": "The filename to read from."
151 | },
152 | "line_number": {
153 | "type": "integer",
154 | "description": "The line number to focus on. Some context before and after that line will be provided."
155 | }
156 | },
157 | "required": [ "filename", "line_number" ]
158 | }
159 | }
160 | """
161 | return f"code {filename}:{line_number}", self._run_one_command(
162 | f"code {filename}:{line_number}"
163 | )
164 |
165 | def llm_find_definition(self, filename: str, line_number: int, symbol: str) -> str:
166 | """
167 | {
168 | "name": "find_definition",
169 | "description": "The `find_definition` function returns the source code for the definition for the given symbol at the given source line number. Call `find_definition` on every symbol that could be linked to the issue.",
170 | "parameters": {
171 | "type": "object",
172 | "properties": {
173 | "filename": {
174 | "type": "string",
175 | "description": "The filename the symbol is from."
176 | },
177 | "line_number": {
178 | "type": "integer",
179 | "description": "The line number where the symbol is present."
180 | },
181 | "symbol": {
182 | "type": "string",
183 | "description": "The symbol to lookup."
184 | }
185 | },
186 | "required": [ "filename", "line_number", "symbol" ]
187 | }
188 | }
189 | """
190 | return f"definition {filename}:{line_number} {symbol}", self._run_one_command(
191 | f"definition {filename}:{line_number} {symbol}"
192 | )
193 |
194 | def _supported_functions(self):
195 | functions = [self.llm_debug, self.llm_get_code_surrounding]
196 | if clangd_lsp_integration.is_available():
197 | functions += [self.llm_find_definition]
198 | return functions
199 |
200 | def _make_assistant(self) -> Assistant:
201 |
202 | functions = self._supported_functions()
203 | instruction_prompt = self.initial_prompt_instructions()
204 |
205 | # gdb overwrites sys.stdin to be a file object that doesn't seem
206 | # to support colors or streaming. So, just use the original stdout
207 | # here for all subclasses.
208 | printer = chatdbg_config.make_printer(sys.__stdout__, self._prompt, " ", 80)
209 |
210 | assistant = Assistant(
211 | instruction_prompt,
212 | model=chatdbg_config.model,
213 | functions=functions,
214 | listeners=[
215 | printer,
216 | self._log,
217 | ],
218 | )
219 |
220 | return assistant
221 |
222 | def warn(self, message):
223 | print(message)
224 |
225 | def fail(self, message):
226 | raise DBGError(message)
227 |
--------------------------------------------------------------------------------
/src/chatdbg/native_util/safety.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | # A very simple whitelist-based approach.
5 | # If ChatDBG wants to call other commands not listed here, they should be
6 | # evaluated and added if not possibly harmful.
7 | def command_is_safe(cmd: str) -> bool:
8 | cmd = cmd.strip()
9 | command_name = cmd.split()[0]
10 |
11 | # Allowed unconditionally.
12 | if command_name in [
13 | "apropos",
14 | "bt",
15 | "down",
16 | "frame",
17 | "h",
18 | "help",
19 | "info",
20 | "language",
21 | "l",
22 | "list",
23 | "source",
24 | "up",
25 | "version",
26 | ]:
27 | return True
28 |
29 | # Allowed conditionally.
30 | if command_name in ["p", "print"]:
31 | return re.fullmatch(r"([a-zA-Z0-9_ *.]|->)*", cmd) is not None
32 |
33 | return False
34 |
--------------------------------------------------------------------------------
/src/chatdbg/native_util/stacks.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 |
3 | import llm_utils
4 |
5 |
6 | class _ArgumentEntry:
7 | def __init__(self, type: str, name: str, value: str):
8 | self._type = type
9 | self._name = name
10 | self._value = value
11 |
12 | def __str__(self):
13 | return f"({self._type}) {self._name} = {self._value if self._value is not None else '[unknown]'}"
14 |
15 | def __repr__(self):
16 | return f"_ArgumentEntry({repr(self._type)}, {repr(self._name)}, {repr(self._value) if self._value is not None else '[unknown]'})"
17 |
18 |
19 | class _FrameSummaryEntry:
20 | def __init__(
21 | self,
22 | index: int,
23 | name: str,
24 | arguments: list[_ArgumentEntry],
25 | file_path: str,
26 | lineno: int,
27 | ):
28 | self._index = index
29 | self._name = name
30 | self._arguments = arguments
31 | self._file_path = file_path
32 | self._lineno = lineno
33 |
34 | def index(self):
35 | return self._index
36 |
37 | def file_path(self):
38 | return self._file_path
39 |
40 | def lineno(self):
41 | return self._lineno
42 |
43 | def __str__(self):
44 | a = ", ".join([str(a) for a in self._arguments])
45 | return f"{self._index}: {self._name}({a}) at {self._file_path}:{self._lineno}"
46 |
47 | def __repr__(self):
48 | return f"_FrameSummaryEntry({self._index}, {repr(self._name)}, {repr(self._arguments)}, {repr(self._file_path)}, {self._lineno})"
49 |
50 |
51 | class _SkippedFramesEntry:
52 | def __init__(self, count: int):
53 | self._count = count
54 |
55 | def count(self):
56 | return self._count
57 |
58 | def __str__(self):
59 | return f"[{self._count} skipped frame{'s' if self._count > 1 else ''}...]"
60 |
61 | def __repr__(self):
62 | return f"_SkippedFramesEntry({self._count})"
63 |
64 |
65 | def build_enriched_stacktrace(summaries):
66 | parts = []
67 | if not summaries:
68 | print("could not generate any frame summary.")
69 | return
70 | else:
71 | frame_summary = "\n".join([str(s) for s in summaries])
72 | parts.append(frame_summary)
73 |
74 | total_frames = sum(
75 | [s.count() if isinstance(s, _SkippedFramesEntry) else 1 for s in summaries]
76 | )
77 |
78 | if total_frames > 1000:
79 | parts.append(
80 | "Note that there are over 1000 frames in the stack trace, hinting at a possible stack overflow error."
81 | )
82 |
83 | max_initial_locations_to_send = 3
84 | source_code_entries = []
85 | for summary in summaries:
86 | if isinstance(summary, _FrameSummaryEntry):
87 | file_path, lineno = summary.file_path(), summary.lineno()
88 | lines, first = llm_utils.read_lines(file_path, lineno - 10, lineno + 9)
89 | block = llm_utils.number_group_of_lines(lines, first)
90 | block = textwrap.indent(block, " ")
91 | source_code_entries.append(
92 | f"Frame #{summary.index()} at {file_path}:{lineno}:\n{block}\n"
93 | )
94 |
95 | if len(source_code_entries) == max_initial_locations_to_send:
96 | break
97 |
98 | if source_code_entries:
99 | parts.append(
100 | f"Here is the source code for the first {len(source_code_entries)} frames:\n\n"
101 | + "\n\n".join(source_code_entries)
102 | )
103 | else:
104 | print("could not retrieve source code for any frames.")
105 | return "\n\n".join(parts)
106 |
--------------------------------------------------------------------------------
/src/chatdbg/pdb_util/capture.py:
--------------------------------------------------------------------------------
1 | from io import StringIO, TextIOWrapper
2 |
3 |
4 | class CaptureInput:
5 | def __init__(self, input_stream):
6 | input_stream = TextIOWrapper(input_stream.buffer, encoding="utf-8", newline="")
7 |
8 | self.original_input = input_stream
9 | self.capture_buffer = StringIO()
10 | self.original_readline = input_stream.buffer.raw.readline
11 |
12 | def custom_readline(*args, **kwargs):
13 | input_data = self.original_readline(*args, **kwargs)
14 | self.capture_buffer.write(input_data.decode())
15 | return input_data
16 |
17 | input_stream.buffer.raw.readline = custom_readline
18 |
19 | def readline(self, *args, **kwargs):
20 | input_data = self.original_input.readline(*args, **kwargs)
21 | self.capture_buffer.write(input_data)
22 | self.capture_buffer.flush()
23 | return input_data
24 |
25 | def read(self, *args, **kwargs):
26 | input_data = self.original_input.read(*args, **kwargs)
27 | self.capture_buffer.write(input_data)
28 | self.capture_buffer.flush()
29 | return input_data
30 |
31 | def get_captured_input(self):
32 | return self.capture_buffer.getvalue()
33 |
34 |
35 | class CaptureOutput:
36 | """
37 | File wrapper that will stash a copy of everything written.
38 | """
39 |
40 | def __init__(self, file):
41 | self.file = file
42 | self.buffer = StringIO()
43 |
44 | def write(self, data):
45 | self.buffer.write(data)
46 | return self.file.write(data)
47 |
48 | def getvalue(self):
49 | return self.buffer.getvalue()
50 |
51 | def getfile(self):
52 | return self.file
53 |
54 | def __getattr__(self, attr):
55 | # Delegate attribute access to the file object
56 | return getattr(self.file, attr)
57 |
--------------------------------------------------------------------------------
/src/chatdbg/pdb_util/locals.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import inspect
3 | import itertools
4 | import numbers
5 | import textwrap
6 |
7 | import numpy as np
8 |
9 | from io import StringIO
10 | from types import FrameType
11 | from typing import Any, Union
12 |
13 |
14 | class SymbolFinder(ast.NodeVisitor):
15 | def __init__(self):
16 | self.defined_symbols = set()
17 |
18 | def visit_Assign(self, node: ast.Assign) -> None:
19 | for target in node.targets:
20 | if isinstance(target, ast.Name):
21 | self.defined_symbols.add(target.id)
22 | self.generic_visit(node)
23 |
24 | def visit_For(self, node: ast.For) -> None:
25 | if isinstance(node.target, ast.Name):
26 | self.defined_symbols.add(node.target.id)
27 | self.generic_visit(node)
28 |
29 | def visit_comprehension(self, node: ast.Name) -> None:
30 | if isinstance(node.target, ast.Name):
31 | self.defined_symbols.add(node.target.id)
32 | self.generic_visit(node)
33 |
34 |
35 | def _extract_locals(frame: FrameType) -> set[str]:
36 | try:
37 | source = textwrap.dedent(inspect.getsource(frame))
38 | tree = ast.parse(source)
39 |
40 | finder = SymbolFinder()
41 | finder.visit(tree)
42 |
43 | args, varargs, keywords, locals = inspect.getargvalues(frame)
44 | parameter_symbols = set(args + [varargs, keywords])
45 | parameter_symbols.discard(None)
46 |
47 | return (finder.defined_symbols | parameter_symbols) & locals.keys()
48 | except:
49 | # ipes
50 | return set()
51 |
52 |
53 | def _is_iterable(obj: Any) -> bool:
54 | try:
55 | iter(obj)
56 | return True
57 | except TypeError:
58 | return False
59 |
60 |
61 | def _repr_if_defined(obj: Any) -> bool:
62 | if obj.__class__ in [np.ndarray, dict, list, tuple]:
63 | # handle these at iterables to truncate reasonably
64 | return False
65 | result = (
66 | "__repr__" in dir(obj.__class__)
67 | and obj.__class__.__repr__ is not object.__repr__
68 | )
69 | return result
70 |
71 |
72 | def _format_limited(
73 | value: Union[int, np.ndarray], limit: int = 10, depth: int = 3
74 | ) -> str:
75 | def format_tuple(t, depth):
76 | return tuple([helper(x, depth) for x in t])
77 |
78 | def format_list(list, depth):
79 | return [helper(x, depth) for x in list]
80 |
81 | def format_dict(items, depth):
82 | return {k: helper(v, depth) for k, v in items}
83 |
84 | def format_object(obj, depth):
85 | attributes = dir(obj)
86 | fields = {
87 | attr: getattr(obj, attr, None)
88 | for attr in attributes
89 | if not callable(getattr(obj, attr, None)) and not attr.startswith("__")
90 | }
91 | return format(
92 | f"{type(obj).__name__} object with fields {format_dict(fields.items(), depth)}"
93 | )
94 |
95 | def helper(value, depth):
96 | if depth == 0:
97 | return ...
98 | if value is Ellipsis:
99 | return ...
100 | if isinstance(value, dict):
101 | if len(value) > limit:
102 | return format_dict(
103 | list(value.items())[: limit - 1] + [(..., ...)], depth - 1
104 | )
105 | else:
106 | return format_dict(value.items(), depth - 1)
107 | elif isinstance(value, (str, bytes)):
108 | if len(value) > 254:
109 | value = str(value)[0:253] + "..."
110 | return value
111 | elif isinstance(value, tuple):
112 | if len(value) > limit:
113 | return format_tuple(value[0 : limit - 1] + (...,), depth - 1)
114 | else:
115 | return format_tuple(value, depth - 1)
116 | elif value is None or isinstance(
117 | value, (int, float, bool, type, numbers.Number)
118 | ):
119 | return value
120 | elif isinstance(value, np.ndarray):
121 | with np.printoptions(threshold=limit):
122 | return np.array_repr(value)
123 | elif inspect.isclass(type(value)) and _repr_if_defined(value):
124 | return repr(value)
125 | elif _is_iterable(value):
126 | value = list(itertools.islice(value, 0, limit + 1))
127 | if len(value) > limit:
128 | return format_list(value[: limit - 1] + [...], depth - 1)
129 | else:
130 | return format_list(value, depth - 1)
131 | elif inspect.isclass(type(value)):
132 | return format_object(value, depth - 1)
133 | else:
134 | return value
135 |
136 | result = str(helper(value, depth=3)).replace("Ellipsis", "...")
137 | if len(result) > 1024 * 2:
138 | result = result[: 1024 * 2 - 3] + "..."
139 | if type(value) == str:
140 | return "'" + result + "'"
141 | else:
142 | return result
143 |
144 |
145 | def print_locals(file: StringIO, frame: FrameType) -> None:
146 | locals = frame.f_locals
147 | in_global_scope = locals is frame.f_globals
148 | defined_locals = _extract_locals(frame)
149 | # Unclear benefit: possibly some benefit w/ stack only runs, but large context...
150 | # if in_global_scope and "In" in locals: # in notebook
151 | # defined_locals = defined_locals | extract_nb_globals(locals)
152 | if len(defined_locals) > 0:
153 | if in_global_scope:
154 | print(f" Global variables:", file=file)
155 | else:
156 | print(f" Variables in this frame:", file=file)
157 | for name in sorted(defined_locals):
158 | value = locals[name]
159 | t = type(value).__name__
160 | prefix = f" {name}: {t} = "
161 | rep_list = _format_limited(value, limit=20).split("\n")
162 | if len(rep_list) > 1:
163 | rep = (
164 | prefix
165 | + rep_list[0]
166 | + "\n"
167 | + textwrap.indent("\n".join(rep_list[1:]), prefix=" " * len(prefix))
168 | )
169 | else:
170 | rep = prefix + rep_list[0]
171 | print(rep, file=file)
172 | print(file=file)
173 |
--------------------------------------------------------------------------------
/src/chatdbg/pdb_util/paths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 |
5 | def is_library_file(file_path):
6 | # Check if file is in standard library directory
7 | std_lib_path = os.path.dirname(os.__file__)
8 | if os.path.commonpath([file_path, std_lib_path]) == std_lib_path:
9 | return True
10 | # Check if file is in site-packages or dist-packages (common locations for installed libraries)
11 | for path in sys.path:
12 | if "site-packages" in path or "dist-packages" in path:
13 | if os.path.commonpath([file_path, path]) == path:
14 | return True
15 | # Assume the file is a user-written file if it doesn't match the above criteria
16 | return False
17 |
18 |
19 | def main():
20 | user_path = os.path.abspath(sys.path[0])
21 | print(f"*** user path: {user_path} ***")
22 |
23 | library_paths = [os.path.dirname(os.__file__)] + [
24 | path for path in sys.path if "site-packages" in path or "dist-packages" in path
25 | ]
26 |
27 | print(library_paths)
28 |
29 | file_path = "/path/to/your/file.py"
30 | if is_library_file(file_path):
31 | print(f"{file_path} is likely a library file.")
32 | else:
33 | print(f"{file_path} is likely a user-written file.")
34 |
35 |
36 | if __name__ == "__main__":
37 | print(f"std lib: {os.path.dirname(os.__file__)}")
38 | for path in sys.path:
39 | if "site-packages" in path or "dist-packages" in path:
40 | print(f"packages: {path}")
41 |
42 | main()
43 |
--------------------------------------------------------------------------------
/src/chatdbg/pdb_util/sandbox.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import re
3 | from chatdbg.util.config import chatdbg_config
4 |
5 |
6 | def _sandboxed_call(func, *args, **kwargs):
7 | """
8 | Check if the function is in the module whitelist before calling it.
9 | """
10 | allowed_modules = chatdbg_config.get_module_whitelist()
11 |
12 | # Get the module name of the function.
13 | # If the module name is None, use the __name__ attribute of the globals dictionary.
14 | module_name = func.__module__
15 | if module_name is None:
16 | module_name = func.__globals__.get("__name__", None)
17 |
18 | # Check if the function is in the module whitelist. If it is, call the function.
19 | # Otherwise, raise an ImportError.
20 | if any(
21 | re.fullmatch(allowed, f"{module_name}.{func.__name__}")
22 | for allowed in allowed_modules
23 | ):
24 | return func(*args, **kwargs)
25 | else:
26 | raise ImportError(
27 | f"Calling function {func.__name__} from module {module_name} is not allowed."
28 | )
29 |
30 |
31 | class SandboxTransformer(ast.NodeTransformer):
32 | """
33 | Wrap all function calls in the expression with a call to _sandboxed_call.
34 | """
35 |
36 | def visit_Call(self, node):
37 | new_node = ast.Call(
38 | func=ast.Name(id="_sandboxed_call", ctx=ast.Load()),
39 | args=[node.func] + node.args,
40 | keywords=node.keywords,
41 | )
42 | return ast.copy_location(new_node, node)
43 |
44 |
45 | def sandbox_eval(expression, globals, locals):
46 | """
47 | Wrap all function calls in the expression with a call to _sandboxed_call.
48 | This function will raise an ImportError if the function is not in the module whitelist.
49 | """
50 | tree = ast.parse(expression, mode="eval")
51 | tree = SandboxTransformer().visit(tree)
52 | ast.fix_missing_locations(tree)
53 | code = compile(tree, filename="", mode="eval")
54 | globals = globals.copy()
55 | globals["_sandboxed_call"] = _sandboxed_call
56 | return eval(code, globals, locals)
57 |
--------------------------------------------------------------------------------
/src/chatdbg/util/config.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | from traitlets import Bool, Int, Unicode
5 | from traitlets.config import Configurable
6 |
7 | from chatdbg.assistant.listeners import BaseAssistantListener
8 | from chatdbg.util.markdown import ChatDBGMarkdownPrinter
9 | from chatdbg.util.printer import ChatDBGPrinter
10 |
11 | from io import TextIOWrapper
12 | from typing import Union
13 |
14 | from chatdbg.util.jupyter import ChatDBGJupyterPrinter
15 |
16 |
17 | def _chatdbg_get_env(
18 | option_name: str, default_value: Union[bool, int, str]
19 | ) -> Union[bool, int, str]:
20 | env_name = "CHATDBG_" + option_name.upper()
21 | v = os.getenv(env_name, str(default_value))
22 | if type(default_value) == int:
23 | return int(v)
24 | elif type(default_value) == bool:
25 | return v.lower() == "true" or v.lower() == "1"
26 | else:
27 | return v
28 |
29 |
30 | class DBGParser(argparse.ArgumentParser):
31 | def __init__(self, **kwargs) -> None:
32 | super().__init__(**kwargs)
33 |
34 | def error(self, message):
35 | raise Exception(f"Error: {message}\n")
36 |
37 |
38 | class ChatDBGConfig(Configurable):
39 | model = Unicode(_chatdbg_get_env("model", "gpt-4o"), help="The LLM model").tag(
40 | config=True
41 | )
42 |
43 | log = Unicode(_chatdbg_get_env("log", "log.yaml"), help="The log file").tag(
44 | config=True
45 | )
46 |
47 | tag = Unicode(_chatdbg_get_env("tag", ""), help="Any extra info for log file").tag(
48 | config=True
49 | )
50 |
51 | rc_lines = Unicode(
52 | _chatdbg_get_env("rc_lines", "[]"), help="lines to run at startup"
53 | ).tag(config=True)
54 |
55 | context = Int(
56 | _chatdbg_get_env("context", 10),
57 | help="lines of source code to show when displaying stacktrace information",
58 | ).tag(config=True)
59 |
60 | show_locals = Bool(
61 | _chatdbg_get_env("show_locals", True),
62 | help="show local var values in stacktrace",
63 | ).tag(config=True)
64 |
65 | show_libs = Bool(
66 | _chatdbg_get_env("show_libs", False), help="show library frames in stacktrace"
67 | ).tag(config=True)
68 |
69 | show_slices = Bool(
70 | _chatdbg_get_env("show_slices", True), help="support the `slice` command"
71 | ).tag(config=True)
72 |
73 | take_the_wheel = Bool(
74 | _chatdbg_get_env("take_the_wheel", True), help="Let LLM take the wheel"
75 | ).tag(config=True)
76 |
77 | format = Unicode(
78 | _chatdbg_get_env("format", "md"),
79 | help="The output format (text or md or md:simple or jupyter)",
80 | ).tag(config=True)
81 |
82 | instructions = Unicode(
83 | _chatdbg_get_env("instructions", ""),
84 | help="The file for the initial instructions to the LLM, or '' for the default (possibly-model specific) version",
85 | ).tag(config=True)
86 |
87 | module_whitelist = Unicode(
88 | _chatdbg_get_env("module_whitelist", ""), help="The module whitelist file"
89 | ).tag(config=True)
90 |
91 | unsafe = Bool(
92 | _chatdbg_get_env("unsafe", False),
93 | help="Disable any protections against GPT running harmful code or commands",
94 | ).tag(config=True)
95 |
96 | _user_configurable = [
97 | log,
98 | model,
99 | instructions,
100 | format,
101 | module_whitelist,
102 | unsafe,
103 | ]
104 |
105 | def _parser(self):
106 | parser = DBGParser(add_help=False)
107 |
108 | for trait in self._user_configurable:
109 | name = f"--{trait.name}"
110 | value = self._trait_values[trait.name]
111 | t = type(value)
112 | if t == bool:
113 | parser.add_argument(name, default=value, action="store_true")
114 | else:
115 | parser.add_argument(name, default=value, type=t)
116 |
117 | return parser
118 |
119 | def to_json(self) -> dict[str, Union[int, str, bool]]:
120 | """Serialize the object to a JSON string."""
121 | return {
122 | "model": self.model,
123 | "log": self.log,
124 | "tag": self.tag,
125 | "rc_lines": self.rc_lines,
126 | "context": self.context,
127 | "show_locals": self.show_locals,
128 | "show_libs": self.show_libs,
129 | "show_slices": self.show_slices,
130 | "take_the_wheel": self.take_the_wheel,
131 | "format": self.format,
132 | "instructions": self.instructions,
133 | "module_whitelist": self.module_whitelist,
134 | }
135 |
136 | def parse_user_flags(self, argv: list[str]) -> None:
137 |
138 | args, unknown_args = self._parser().parse_known_args(argv)
139 |
140 | for x in self._user_configurable:
141 | self.set_trait(x.name, getattr(args, x.name))
142 |
143 | return unknown_args
144 |
145 | def user_flags_help(self) -> str:
146 | return "\n".join(
147 | [
148 | self.class_get_trait_help(x, self).replace("ChatDBGConfig.", "")
149 | for x in self._user_configurable
150 | ]
151 | )
152 |
153 | def user_flags(self) -> str:
154 | return "\n".join(
155 | [
156 | f" --{x.name:10}{self._trait_values[x.name]}"
157 | for x in self._user_configurable
158 | ]
159 | )
160 |
161 | def parse_only_user_flags(self, args: list[str]) -> str:
162 | try:
163 | unknown = chatdbg_config.parse_user_flags(args)
164 | if unknown:
165 | return (
166 | f"Unrecognized arguments: {' '.join(unknown)}\n\n"
167 | + f"ChatDBG arguments:\n\n{self.user_flags_help()}"
168 | )
169 | return chatdbg_config.user_flags()
170 | except Exception as e:
171 | return str(e) + f"\nChatDBG arguments:\n\n{self.user_flags_help()}"
172 |
173 | def make_printer(
174 | self, stdout: TextIOWrapper, prompt: str, prefix: str, width: int
175 | ) -> BaseAssistantListener:
176 | format = chatdbg_config.format
177 | split = format.split(":")
178 | if split[0] == "md":
179 | theme = split[1] if len(split) == 2 else None
180 | return ChatDBGMarkdownPrinter(stdout, prompt, prefix, width, theme=theme)
181 | elif format == "text":
182 | return ChatDBGPrinter(stdout, prompt, prefix, width)
183 | elif format == "jupyter":
184 | return ChatDBGJupyterPrinter(prompt, prefix, width)
185 | else:
186 | print("*** Unknown format '{format}'. Defaulting to 'text'", file=stdout)
187 | return ChatDBGPrinter(stdout, prompt, prefix, width)
188 |
189 | def get_module_whitelist(self) -> str:
190 | if self.module_whitelist == "":
191 | file_path = os.path.join(os.path.dirname(__file__), f"module_whitelist.txt")
192 | else:
193 | file_path = self.module_whitelist
194 |
195 | with open(file_path, "r") as file:
196 | return [module.rstrip() for module in file if module.rstrip() != ""]
197 |
198 |
199 | chatdbg_config: ChatDBGConfig = ChatDBGConfig()
200 |
--------------------------------------------------------------------------------
/src/chatdbg/util/exit_message.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | _chatdbg_was_called = False
4 |
5 |
6 | def chatdbg_was_called() -> None:
7 | global _chatdbg_was_called
8 | _chatdbg_was_called = True
9 |
10 |
11 | def print_exit_message(*args: Any, **kwargs: Any) -> None:
12 | global _chatdbg_was_called
13 | if _chatdbg_was_called:
14 | print("Thank you for using ChatDBG!")
15 | print(
16 | "Share your success stories here: https://github.com/plasma-umass/ChatDBG/issues/53"
17 | )
18 |
--------------------------------------------------------------------------------
/src/chatdbg/util/help.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from chatdbg.util.config import chatdbg_config
4 |
5 | _usage = """\
6 | usage: python -m ipdb [-m] [-c command] ... pyfile [arg] ...
7 |
8 | Debug the Python program given by pyfile.
9 |
10 | Initial commands are read from .pdbrc files in your home directory
11 | and in the current directory, if they exist. Commands supplied with
12 | -c are executed after commands from .pdbrc files.
13 |
14 | To let the script run until an exception occurs, use "-c continue".
15 | To let the script run up to a given line X in the debugged file, use
16 | "-c 'until X'"
17 |
18 | Option -m is available only in Python 3.7 and later.
19 |
20 | ChatDBG-specific options may appear anywhere before pyfile:
21 | """
22 |
23 |
24 | def print_help():
25 | print(_usage)
26 | print(chatdbg_config.user_flags_help())
27 | sys.exit()
28 |
--------------------------------------------------------------------------------
/src/chatdbg/util/history.py:
--------------------------------------------------------------------------------
1 | class CommandHistory:
2 | def __init__(self, prompt: str):
3 | self._history: list[tuple[str, str]] = []
4 | self._prompt = prompt
5 |
6 | def append(self, command: str, result: str) -> None:
7 | self._history += [(command, result)]
8 |
9 | def clear(self) -> None:
10 | self._history = []
11 |
12 | def _format_history_entry(self, entry: tuple[str, str]) -> str:
13 | line, output = entry
14 | if output:
15 | return f"{self._prompt}{line}\n{output}"
16 | else:
17 | return f"{self._prompt}{line}"
18 |
19 | def __str__(self) -> str:
20 | entry_strs = [self._format_history_entry(x) for x in self._history]
21 | return "\n".join(entry_strs)
22 |
--------------------------------------------------------------------------------
/src/chatdbg/util/instructions/default.txt:
--------------------------------------------------------------------------------
1 | You are a debugging assistant. You will be given a stack trace for
2 | an error and answer questions related to the root cause of the
3 | error.
4 |
5 | {functions}
6 |
7 | Call any provided functions as many times as you would like.
8 |
9 | The root cause of any error is likely due to a problem in the source
10 | code from the user.
11 |
12 | Explain why each variable contributing to the error has been set
13 | to the value that it has.
14 |
15 | Continue with your explanations until you reach the root cause of
16 | the error. Your answer may be as long as necessary.
17 |
18 | End your answer with a section titled "##### Recommendation\\n" that
19 | contains one of:
20 | * a fix if you have identified the root cause
21 | * a numbered list of 1-3 suggestions for how to continue debugging if
22 | you have not
23 |
--------------------------------------------------------------------------------
/src/chatdbg/util/instructions/gpt-4o.txt:
--------------------------------------------------------------------------------
1 | You are a debugging assistant. You will be given a stack trace for
2 | an error and answer questions related to the root cause of the
3 | error.
4 |
5 | {functions}
6 |
7 | Call any provided functions as many times as you would like.
8 |
9 | The root cause of any error is likely due to a problem in the source
10 | code from the user.
11 |
12 | Continue with your explanations until you reach the root cause of
13 | the error. Your answer may be as long as necessary.
14 |
15 | End your answer with a section titled "##### Recommendation\\n" that
16 | contains one of:
17 | * a fix if you have identified the root cause
18 | * a numbered list of 1-3 suggestions for how to continue debugging if
19 | you have not
20 |
--------------------------------------------------------------------------------
/src/chatdbg/util/jupyter.py:
--------------------------------------------------------------------------------
1 | from io import StringIO
2 |
3 | from IPython.display import HTML, display, update_display
4 | from rich.console import Console
5 | from rich.markdown import Markdown
6 | from chatdbg.util.markdown import ChatDBGMarkdownPrinter
7 |
8 |
9 | class ChatDBGJupyterPrinter(ChatDBGMarkdownPrinter):
10 |
11 | def __init__(self, debugger_prompt, chat_prefix, width):
12 | super().__init__(StringIO(), debugger_prompt, chat_prefix, width)
13 |
14 | def _make_console(self, out):
15 | return Console(
16 | soft_wrap=False, file=out, record=True, theme=self._theme, width=self._width
17 | )
18 |
19 | # Call backs
20 |
21 | # override to flush to the display
22 | def _print(self, text, end=""):
23 | super()._print(text, end=end)
24 | display(HTML(self._export_html()))
25 |
26 | def _export_html(self):
27 | exported_html = self._console.export_html(clear=True, inline_styles=True)
28 | custom_css = """
29 |
40 | """
41 | exported_html = f'{exported_html}
'
42 | modified_html = custom_css + exported_html
43 | return modified_html
44 |
45 | def _stream_append(self, text):
46 | self._streamed += text
47 | m = self._wrap_in_panel(Markdown(self._streamed, code_theme=self._code_theme))
48 | self._console.print(m)
49 | exported_html = self._export_html()
50 | update_display(HTML(exported_html), display_id=self._display_handle.display_id)
51 |
52 | def on_begin_stream(self):
53 | self._streamed = ""
54 |
55 | def on_stream_delta(self, text):
56 | if self._streamed == "":
57 | self._display_handle = display(HTML(""), display_id=True)
58 | self._stream_append(text)
59 |
60 | def on_end_stream(self):
61 | pass
62 |
--------------------------------------------------------------------------------
/src/chatdbg/util/log.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import uuid
3 | from datetime import datetime
4 |
5 | import yaml
6 |
7 | from ..assistant.listeners import BaseAssistantListener
8 | from ..pdb_util.capture import CaptureOutput
9 | from .wrap import word_wrap_except_code_blocks
10 |
11 |
12 | class ChatDBGLog(BaseAssistantListener):
13 |
14 | def __init__(self, log_filename, config, capture_streams=True):
15 | self._log_filename = log_filename
16 | self.config = config
17 | if capture_streams:
18 | self._stdout_wrapper = CaptureOutput(sys.stdout)
19 | self._stderr_wrapper = CaptureOutput(sys.stderr)
20 | sys.stdout = self._stdout_wrapper
21 | sys.stderr = self._stdout_wrapper
22 | else:
23 | self._stderr_wrapper = None
24 | self._stderr_wrapper = None
25 |
26 | self._log = self._make_log()
27 | self._current_chat = None
28 |
29 | def _make_log(self):
30 | meta = {
31 | "time": datetime.now(),
32 | "command_line": " ".join(sys.argv),
33 | "uid": str(uuid.uuid4()),
34 | "config": self.config,
35 | }
36 | return {
37 | "steps": [],
38 | "meta": meta,
39 | "instructions": None,
40 | "stdout": (
41 | None
42 | if self._stderr_wrapper == None
43 | else self._stdout_wrapper.getvalue()
44 | ),
45 | "stderr": (
46 | None
47 | if self._stderr_wrapper == None
48 | else self._stderr_wrapper.getvalue()
49 | ),
50 | }
51 |
52 | def _dump(self):
53 | log = self._log
54 |
55 | def total(key):
56 | return sum(
57 | x["stats"][key]
58 | for x in log["steps"]
59 | if x["output"]["type"] == "chat" and "stats" in x["output"]
60 | )
61 |
62 | log["meta"]["total_tokens"] = total("tokens")
63 | log["meta"]["total_time"] = total("time")
64 | log["meta"]["total_cost"] = total("cost")
65 |
66 | print(f"*** Writing ChatDBG dialog log to {self._log_filename}")
67 |
68 | with open(self._log_filename, "a") as file:
69 |
70 | def literal_presenter(dumper, data):
71 | if "\n" in data:
72 | return dumper.represent_scalar(
73 | "tag:yaml.org,2002:str", data, style="|"
74 | )
75 | else:
76 | return dumper.represent_scalar("tag:yaml.org,2002:str", data)
77 |
78 | yaml.add_representer(str, literal_presenter)
79 | yaml.dump([log], file, default_flow_style=False, indent=2)
80 |
81 | def on_begin_dialog(self, instructions):
82 | log = self._log
83 | assert log != None
84 | log["instructions"] = instructions
85 |
86 | def on_end_dialog(self):
87 | if self._log != None:
88 | self._dump()
89 | self._log = self._make_log()
90 |
91 | def on_begin_query(self, prompt, extra):
92 | log = self._log
93 | assert log != None
94 | assert self._current_chat == None
95 | self._current_chat = {
96 | "input": extra,
97 | "prompt": prompt,
98 | "output": {"type": "chat", "outputs": []},
99 | }
100 |
101 | def on_end_query(self, stats):
102 | log = self._log
103 | assert log != None
104 | assert self._current_chat != None
105 | log["steps"] += [self._current_chat]
106 | log["stats"] = stats
107 | self._current_chat = None
108 |
109 | def _post(self, text, kind):
110 | log = self._log
111 | assert log != None
112 | if self._current_chat != None:
113 | self._current_chat["output"]["outputs"].append(
114 | {"type": "text", "output": f"*** {kind}: {text}"}
115 | )
116 | else:
117 | log["steps"].append(
118 | {
119 | "type": "call",
120 | "input": f"*** {kind}",
121 | "output": {"type": "text", "output": text},
122 | }
123 | )
124 |
125 | def on_warn(self, text):
126 | self._post(text, "Warning")
127 |
128 | def on_response(self, text):
129 | log = self._log
130 | assert log != None
131 | assert self._current_chat != None
132 | text = word_wrap_except_code_blocks(text)
133 | self._current_chat["output"]["outputs"].append({"type": "text", "output": text})
134 |
135 | def on_function_call(self, call, result):
136 | log = self._log
137 | assert log != None
138 | if self._current_chat != None:
139 | self._current_chat["output"]["outputs"].append(
140 | {
141 | "type": "call",
142 | "input": call,
143 | "output": {"type": "text", "output": result},
144 | }
145 | )
146 | else:
147 | log["steps"].append(
148 | {
149 | "type": "call",
150 | "input": call,
151 | "output": {"type": "text", "output": result},
152 | }
153 | )
154 |
--------------------------------------------------------------------------------
/src/chatdbg/util/markdown.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import textwrap
3 |
4 | from rich import box
5 | import rich
6 | from rich.console import Console
7 | from rich.live import Live
8 | from rich.markdown import Markdown
9 | from rich.panel import Panel
10 | from rich.theme import Theme
11 | from rich.table import Table
12 | from rich.markup import escape
13 |
14 | from chatdbg.util.text import fill_to_width, wrap_long_lines
15 |
16 | from ..assistant.listeners import BaseAssistantListener
17 |
18 |
19 | _themes = {
20 | "default": Theme(
21 | {
22 | "markdown.block": "black on light_steel_blue1",
23 | "markdown.paragraph": "black on light_steel_blue1",
24 | "markdown.text": "black on light_steel_blue1",
25 | "markdown.code": "blue",
26 | "markdown.code_block": "blue",
27 | "markdown.item.bullet": "bold blue",
28 | "markdown.item.number": "bold blue",
29 | "markdown.h1": "bold black",
30 | "markdown.h2": "bold black",
31 | "markdown.h3": "bold black",
32 | "markdown.h4": "bold black",
33 | "markdown.h5": "bold black",
34 | "command": "bold gray11 on wheat1",
35 | "result": "grey35 on wheat1",
36 | "warning": "bright_white on green",
37 | "error": "bright_white on red",
38 | }
39 | ),
40 | "basic": Theme(
41 | {
42 | "markdown.block": "bright_blue on bright_white",
43 | "markdown.paragraph": "bright_blue on bright_white",
44 | "markdown.text": "bright_blue on bright_white",
45 | "markdown.code": "cyan",
46 | "markdown.code_block": "bright_blue",
47 | "markdown.item.bullet": "bold cyan",
48 | "markdown.item.number": "bold cyan",
49 | "markdown.h1": "bold black",
50 | "markdown.h2": "bold black",
51 | "markdown.h3": "bold black",
52 | "markdown.h4": "bold black",
53 | "markdown.h5": "bold bright_blue on bright_white",
54 | "command": "bold bright_yellow on white",
55 | "result": "yellow on white",
56 | "warning": "bright_white on green",
57 | "error": "bright_white on red",
58 | }
59 | ),
60 | }
61 |
62 | _simple_box = Box = box.Box(
63 | " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n", ascii=True
64 | )
65 |
66 | from rich.markdown import ConsoleOptions, loop_first, RenderResult, Segment
67 |
68 |
69 | class MyListItem(rich.markdown.ListItem):
70 | """An item in a list."""
71 |
72 | style_name = "markdown.item"
73 |
74 | def __init__(self) -> None:
75 | super().__init__()
76 |
77 | def render_bullet(self, console: Console, options: ConsoleOptions) -> RenderResult:
78 | render_options = options.update(width=options.max_width - 3)
79 | lines = console.render_lines(self.elements, render_options, style=self.style)
80 | bullet_style = console.get_style("markdown.item.bullet", default="none")
81 |
82 | bullet = Segment(" * ", bullet_style)
83 | padding = Segment(" " * 3, bullet_style)
84 | new_line = Segment("\n")
85 | for first, line in loop_first(lines):
86 | yield bullet if first else padding
87 | yield from line
88 | yield new_line
89 |
90 |
91 | class ChatDBGMarkdownPrinter(BaseAssistantListener):
92 |
93 | def __init__(self, out, debugger_prompt, chat_prefix, width, theme=None):
94 | self._out = out
95 | self._debugger_prompt = debugger_prompt
96 | self._chat_prefix = chat_prefix
97 | self._left_indent = 4
98 | self._width = shutil.get_terminal_size(fallback=(width, 24)).columns
99 | self._theme = _themes["default"] if theme == None else _themes[theme]
100 | self._code_theme = "default"
101 | # used to keep track of streaming
102 | self._streamed = ""
103 |
104 | self._console = self._make_console(out)
105 |
106 | if theme == "basic":
107 | Markdown.elements["list_item_open"] = MyListItem
108 | self._code_theme = "monokai"
109 |
110 | def _make_console(self, out):
111 | return Console(soft_wrap=False, file=out, theme=self._theme, width=self._width)
112 |
113 | # Call backs
114 |
115 | def on_begin_query(self, prompt, user_text):
116 | pass
117 |
118 | def on_end_query(self, stats):
119 | pass
120 |
121 | def _print(self, renderable, end=""):
122 | self._console.print(renderable, end=end)
123 |
124 | def _wrap_in_panel(self, rich_element):
125 |
126 | left_panel = Panel("", box=_simple_box, style="on default")
127 | right_panel = Panel(
128 | rich_element,
129 | box=_simple_box,
130 | style=self._console.get_style("markdown.block"),
131 | )
132 |
133 | # Create a table to hold the panels side by side
134 | table = Table.grid(padding=0)
135 | table.add_column(justify="left", width=self._left_indent - 2)
136 | table.add_column(justify="left")
137 | table.add_row(left_panel, right_panel)
138 | return table
139 |
140 | def _message(self, text, style):
141 | self._print(
142 | self._wrap_in_panel(self._wrap_and_fill_and_indent(text, " *** ", style))
143 | )
144 |
145 | def on_warn(self, text):
146 | self._message(text, "warning")
147 |
148 | def on_error(self, text):
149 | self._message(text, "error")
150 |
151 | def _stream_append(self, text):
152 | self._streamed += text
153 | m = self._wrap_in_panel(Markdown(self._streamed, code_theme=self._code_theme))
154 | self._live.update(m)
155 |
156 | def on_begin_stream(self):
157 | self._streamed = ""
158 |
159 | def on_stream_delta(self, text):
160 | if self._streamed == "":
161 | self._live = Live(vertical_overflow="visible", console=self._console)
162 | self._live.start(True)
163 | self._stream_append(text)
164 |
165 | def on_end_stream(self):
166 | if self._streamed != "":
167 | self._live.stop()
168 |
169 | def on_response(self, text):
170 | if self._streamed == "" and text != None:
171 | m = self._wrap_in_panel(Markdown(text, code_theme=self._code_theme))
172 | self._print(m, end="\n")
173 | self._streamed = ""
174 |
175 | def on_function_call(self, call, result):
176 | prefix = self._chat_prefix
177 | line = fill_to_width(f"\n{prefix}{self._debugger_prompt}{call}", self._width)
178 | entry = f"[command]{escape(line)}[/]\n"
179 |
180 | entry += self._wrap_and_fill_and_indent(
181 | result.rstrip() + "\n", prefix, "result"
182 | )
183 | m = self._wrap_in_panel(entry)
184 | self._print(m, end="")
185 |
186 | def _wrap_and_fill_and_indent(self, text, prefix, style_name):
187 | line_width = self._width - len(prefix) - self._left_indent - 2
188 | text = wrap_long_lines(text.expandtabs(), line_width, subsequent_indent=" ")
189 | text = fill_to_width(text, line_width)
190 | text = textwrap.indent(text, prefix, lambda _: True)
191 | text = escape(text)
192 | return f"[{style_name}]{text}[/]"
193 |
--------------------------------------------------------------------------------
/src/chatdbg/util/module_whitelist.txt:
--------------------------------------------------------------------------------
1 | __main__
2 |
3 | pandas.*
4 | numpy.*
5 | scipy.*
6 | datascience.*
7 | cs104.*
8 |
9 | builtins.abs
10 | builtins.aiter
11 | builtins.all
12 | builtins.anext
13 | builtins.any
14 | builtins.ascii
15 | builtins.bin
16 | builtins.bool
17 | builtins.bytearray
18 | builtins.bytes
19 | builtins.callable
20 | builtins.chr
21 | builtins.classmethod
22 | builtins.complex
23 | builtins.delattr
24 | builtins.dict
25 | builtins.dir
26 | builtins.divmod
27 | builtins.enumerate
28 | builtins.filter
29 | builtins.float
30 | builtins.format
31 | builtins.frozenset
32 | builtins.getattr
33 | builtins.globals
34 | builtins.hasattr
35 | builtins.hash
36 | builtins.help
37 | builtins.hex
38 | builtins.id
39 | builtins.input
40 | builtins.int
41 | builtins.isinstance
42 | builtins.issubclass
43 | builtins.iter
44 | builtins.len
45 | builtins.list
46 | builtins.locals
47 | builtins.map
48 | builtins.max
49 | builtins.memoryview
50 | builtins.min
51 | builtins.next
52 | builtins.object
53 | builtins.oct
54 | builtins.open
55 | builtins.ord
56 | builtins.pow
57 | builtins.print
58 | builtins.property
59 | builtins.range
60 | builtins.repr
61 | builtins.reversed
62 | builtins.round
63 | builtins.set
64 | builtins.setattr
65 | builtins.slice
66 | builtins.sorted
67 | builtins.staticmethod
68 | builtins.str
69 | builtins.sum
70 | builtins.super
71 | builtins.tuple
72 | builtins.type
73 | builtins.vars
74 | builtins.zip
75 |
--------------------------------------------------------------------------------
/src/chatdbg/util/plog.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import textwrap
4 |
5 | import yaml
6 |
7 |
8 | class LogPrinter:
9 | def __init__(self, file):
10 | self.file = file
11 |
12 | def print(self, x=""):
13 | print(x, file=self.file)
14 |
15 | def _do_message(self, x):
16 | text = x["output"].strip()
17 | if len(text) > 0:
18 | self.print(textwrap.indent(text, prefix=" "))
19 |
20 | def _do_function(self, x):
21 | prompt = x["input"].strip()
22 | assert x["output"]["type"] == "text"
23 | body = x["output"]["output"].rstrip()
24 | if len(body) > 0:
25 | self.print(f" (ChatDBG) {prompt}")
26 | self.print(textwrap.indent(body, prefix=" "))
27 |
28 | def _do_assistant(self, x):
29 | for output in x["outputs"]:
30 | if output["type"] == "call":
31 | self._do_function(output)
32 | else:
33 | self._do_message(output)
34 | self.print()
35 |
36 | def _do_step(self, x):
37 | prompt = x["input"].strip()
38 |
39 | self.print(f"(ChatDBG) {prompt}")
40 |
41 | output = x["output"]
42 | if output["type"] == "text":
43 | body = output["output"].rstrip()
44 | if len(body) > 0:
45 | self.print(textwrap.indent(body, prefix=" "))
46 | else:
47 | self._do_assistant(output)
48 |
49 | def do_one(self, x):
50 | for step in x["steps"]:
51 | self.print()
52 | self._do_step(step)
53 | self.print()
54 |
55 |
56 | def main():
57 | parser = argparse.ArgumentParser(description="ChatDBG log printer")
58 | parser.add_argument("filenames", nargs="*", help="log files to print")
59 |
60 | args = parser.parse_args()
61 |
62 | for file in args.filenames:
63 | with open(file, "r") as log:
64 | full = yaml.safe_load(log)
65 |
66 | for i, x in enumerate(full):
67 | print()
68 | print(f"{i} " + ("-" * 78))
69 | print(x["instructions"], file=sys.stdout)
70 | print("-" * 80)
71 | LogPrinter(sys.stdout).do_one(x)
72 | print()
73 | print()
74 |
75 |
76 | if __name__ == "__main__":
77 | main()
78 |
--------------------------------------------------------------------------------
/src/chatdbg/util/printer.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 | from ..assistant.listeners import BaseAssistantListener
3 | from .stream import StreamingTextWrapper
4 | from .wrap import word_wrap_except_code_blocks
5 | import os
6 |
7 |
8 | class ChatDBGPrinter(BaseAssistantListener):
9 | def __init__(self, out, debugger_prompt, chat_prefix, width):
10 | self._out = out
11 | self._debugger_prompt = debugger_prompt
12 | self._chat_prefix = chat_prefix
13 | try:
14 | self._width = min(width, os.get_terminal_size().columns - len(chat_prefix))
15 | except:
16 | # get_terminal_size() may fail in notebooks
17 | self._width = width
18 |
19 | # used to keep track of streaming
20 | self._at_start = True
21 |
22 | # Call backs
23 |
24 | def on_begin_query(self, prompt, user_text):
25 | pass
26 |
27 | def on_end_query(self, stats):
28 | pass
29 |
30 | def _print(self, text, **kwargs):
31 | print(
32 | textwrap.indent(text, self._chat_prefix, lambda _: True),
33 | file=self._out,
34 | **kwargs,
35 | )
36 |
37 | def on_warn(self, text):
38 | print(textwrap.indent(text, "*** "), file=self._out)
39 |
40 | def on_error(self, text):
41 | print(textwrap.indent(text, "*** "), file=self._out)
42 |
43 | def on_begin_stream(self):
44 | self._stream_wrapper = StreamingTextWrapper(
45 | self._chat_prefix, width=self._width
46 | )
47 | self._at_start = True
48 |
49 | def on_stream_delta(self, text):
50 | if self._at_start:
51 | self._at_start = False
52 | print(
53 | self._stream_wrapper.append("\n(Message) ", False),
54 | end="",
55 | flush=True,
56 | file=self._out,
57 | )
58 | print(
59 | self._stream_wrapper.append(text, False), end="", flush=True, file=self._out
60 | )
61 |
62 | def on_end_stream(self):
63 | print(self._stream_wrapper.flush(), end="", flush=True, file=self._out)
64 | self._at_start = True
65 |
66 | def on_response(self, text):
67 | if self._at_start and text != None:
68 | text = "(Message) " + text
69 | text = word_wrap_except_code_blocks(
70 | text, self._width - len(self._chat_prefix)
71 | )
72 | self._print(text)
73 |
74 | def on_function_call(self, call, result):
75 | if result and len(result) > 0:
76 | entry = f"{self._debugger_prompt}{call}\n{result}"
77 | else:
78 | entry = f"{self._debugger_prompt}{call}"
79 | self._print(entry)
80 |
--------------------------------------------------------------------------------
/src/chatdbg/util/prompts.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from chatdbg.util.config import chatdbg_config
4 | from .text import truncate_proportionally
5 | from typing import Any, Callable, List
6 |
7 |
8 | def _wrap_it(before: str, text: str, after: str = "", maxlen: int = 2048) -> str:
9 | if text:
10 | text = truncate_proportionally(text, maxlen, 0.5)
11 | before = before + ":\n" if before else ""
12 | after = after + "\n" if after else ""
13 | return f"{before}```\n{text}\n```\n{after}"
14 | else:
15 | return ""
16 |
17 |
18 | def _concat_prompt(*args) -> str:
19 | args = [a for a in args if len(a) > 0]
20 | return "\n".join(args)
21 |
22 |
23 | def _user_text_it(user_text: str) -> str:
24 | return user_text if len(user_text) > 0 else "What's the bug? Give me a fix."
25 |
26 |
27 | def build_initial_prompt(
28 | stack: str,
29 | error: str,
30 | details: str,
31 | command_line: str,
32 | inputs: str,
33 | history: str,
34 | extra: str = "",
35 | user_text: str = "",
36 | ) -> str:
37 | return _concat_prompt(
38 | _wrap_it("The program has this stack trace", stack),
39 | _wrap_it("The program encountered the following error", error, details),
40 | _wrap_it("This was the command line", command_line),
41 | _wrap_it("This was the program's input", inputs),
42 | _wrap_it("This is the history of some debugger commands I ran", history),
43 | _wrap_it("", extra),
44 | _user_text_it(user_text),
45 | )
46 |
47 |
48 | def build_followup_prompt(history: str, extra: str, user_text: str) -> str:
49 | return _concat_prompt(
50 | _wrap_it("This is the history of some debugger commands I ran", history),
51 | _wrap_it("", extra),
52 | _user_text_it(user_text),
53 | )
54 |
55 |
56 | def initial_instructions(functions: List[Callable[[Any], Any]]) -> str:
57 | if chatdbg_config.instructions == "":
58 | file_path = os.path.join(
59 | os.path.dirname(__file__), f"instructions/{chatdbg_config.model}.txt"
60 | )
61 | if not os.path.exists(file_path):
62 | file_path = os.path.join(
63 | os.path.dirname(__file__), f"instructions/default.txt"
64 | )
65 | else:
66 | file_path = chatdbg_config.instructions
67 |
68 | function_instructions = [json.loads(f.__doc__)["description"] for f in functions]
69 | with open(file_path, "r") as file:
70 | template = file.read()
71 | return template.format_map({"functions": "\n\n".join(function_instructions)})
72 |
--------------------------------------------------------------------------------
/src/chatdbg/util/stream.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 | import re
3 | import sys
4 | from .wrap import word_wrap_except_code_blocks
5 |
6 |
7 | class StreamingTextWrapper:
8 |
9 | def __init__(self, indent=" ", width=80):
10 | self._buffer = "" # the raw text so far
11 | self._wrapped = "" # the successfully wrapped text do far
12 | self._pending = (
13 | "" # the part after the last space in buffer -- has not been wrapped yet
14 | )
15 | self._indent = indent
16 | self._width = width - len(indent)
17 |
18 | def append(self, text, flush=False):
19 | if flush:
20 | self._buffer += self._pending + text
21 | self._pending = ""
22 | else:
23 | text_bits = re.split(r"(\s+)", self._pending + text)
24 | self._pending = text_bits[-1]
25 | self._buffer += "".join(text_bits[0:-1])
26 |
27 | wrapped = word_wrap_except_code_blocks(self._buffer, width=self._width)
28 | wrapped = textwrap.indent(wrapped, self._indent, lambda _: True)
29 | wrapped_delta = wrapped[len(self._wrapped) :]
30 | self._wrapped = wrapped
31 | return wrapped_delta
32 |
33 | def flush(self):
34 | if len(self._buffer) > 0:
35 | result = self.append("\n", flush=True)
36 | else:
37 | result = self.append("", flush=True)
38 | self._buffer = ""
39 | self._wrapped = ""
40 | return result
41 |
42 |
43 | if __name__ == "__main__":
44 | s = StreamingTextWrapper(3, 20)
45 | for x in sys.argv[1:]:
46 | y = s.append(" " + x)
47 | print(y, end="", flush=True)
48 | print(s.flush())
49 |
--------------------------------------------------------------------------------
/src/chatdbg/util/text.py:
--------------------------------------------------------------------------------
1 | import re
2 | import textwrap
3 | from typing import Union
4 |
5 |
6 | def make_arrow(pad):
7 | """generate the leading arrow in front of traceback or debugger"""
8 | if pad >= 2:
9 | return "-" * (pad - 2) + "> "
10 | elif pad == 1:
11 | return ">"
12 | return ""
13 |
14 |
15 | def strip_ansi(s: str) -> str:
16 | ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
17 | return ansi_escape.sub("", s)
18 |
19 |
20 | def truncate_proportionally(
21 | text: str, maxlen: int = 32000, top_proportion: Union[float, int] = 0.5
22 | ) -> str:
23 | """Omit part of a string if needed to make it fit in a maximum length."""
24 | if len(text) > maxlen:
25 | pre = max(0, int((maxlen - 5) * top_proportion))
26 | post = max(0, maxlen - 5 - pre)
27 | return text[:pre] + "[...]" + text[len(text) - post :]
28 | return text
29 |
30 |
31 | def wrap_long_lines(text: str, width: int = 80, subsequent_indent: str = " ") -> str:
32 | wrapped_lines = []
33 | for line in text.split("\n"):
34 | if len(line) > width:
35 | wrapped_lines.extend(
36 | textwrap.wrap(line, width, subsequent_indent=subsequent_indent)
37 | )
38 | else:
39 | wrapped_lines.append(line)
40 | return "\n".join(wrapped_lines)
41 |
42 |
43 | def fill_to_width(text: str, width: int = 80) -> str:
44 | return "\n".join([line.ljust(width) for line in text.split("\n")])
45 |
--------------------------------------------------------------------------------
/src/chatdbg/util/trim.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import warnings
3 |
4 | with warnings.catch_warnings():
5 | warnings.simplefilter("ignore")
6 | import litellm
7 |
8 |
9 | def sandwich_tokens(
10 | text: str, model: str, max_tokens: int = 1024, top_proportion: float = 0.5
11 | ) -> str:
12 | if max_tokens == None:
13 | return text
14 | tokens = litellm.encode(model, text)
15 | if len(tokens) <= max_tokens:
16 | return text
17 | else:
18 | total_len = max_tokens - 5 # some slop for the ...
19 | top_len = int(top_proportion * total_len)
20 | bot_start = len(tokens) - (total_len - top_len)
21 | return (
22 | litellm.decode(model, tokens[0:top_len])
23 | + " [...] "
24 | + litellm.decode(model, tokens[bot_start:])
25 | )
26 |
27 |
28 | def _sum_messages(messages, model):
29 | return litellm.token_counter(model, messages=messages)
30 |
31 |
32 | def _sum_kept_chunks(chunks, model):
33 | return sum(_sum_messages(messages, model) for (messages, kept) in chunks if kept)
34 |
35 |
36 | def _extract(messages, model, tool_call_ids):
37 | tools = []
38 | other = []
39 | for m in messages:
40 | if m.get("tool_call_id", -1) in tool_call_ids:
41 | content = sandwich_tokens(m["content"], model, 512, 1.0)
42 | # print(len(litellm.encode(model, m['content'])), '->', len(litellm.encode(model, content)))
43 | m["content"] = content
44 | tools += [m]
45 | else:
46 | other += [m]
47 | return tools, other
48 |
49 |
50 | def _chunkify(messages, model):
51 | if not messages:
52 | return []
53 | m = messages[0]
54 | if "tool_calls" not in m:
55 | m["content"] = sandwich_tokens(m["content"], model, 1024, 0)
56 | return [([m], False)] + _chunkify(messages[1:], model)
57 | else:
58 | ids = [tool_call["id"] for tool_call in m["tool_calls"]]
59 | tools, other = _extract(messages[1:], model, ids)
60 | return [([m] + tools, False)] + _chunkify(other, model)
61 |
62 |
63 | def trim_messages(
64 | messages: list[dict[str, str]], # list of JSON objects encoded as dicts
65 | model: str,
66 | trim_ratio: float = 0.75,
67 | ) -> list:
68 | """
69 | Strategy:
70 | - chunk messages:
71 | - single message, or
72 | - tool request and all the tool responses
73 | - keep the system messages
74 | - keep the first user message
75 | - go most recent to oldest, keeping chunks until we are at the limit
76 |
77 | Also, shorten tool call results along the way.
78 | -"""
79 |
80 | messages = copy.deepcopy(messages)
81 |
82 | max_tokens_for_model = litellm.model_cost[model]["max_input_tokens"]
83 | max_tokens = int(max_tokens_for_model * trim_ratio)
84 |
85 | if litellm.token_counter(model, messages=messages) < max_tokens:
86 | return messages
87 |
88 | chunks = _chunkify(messages=messages, model=model)
89 | # print("0", sum_all_chunks(chunks, model), max_tokens)
90 |
91 | # 1. System messages
92 | chunks = [(m, b or m[0]["role"] == "system") for (m, b) in chunks]
93 | # print("1", sum_kept_chunks(chunks, model))
94 |
95 | # 2. First User Message
96 | for i in range(len(chunks)):
97 | messages, kept = chunks[i]
98 | if messages[0]["role"] == "user":
99 | chunks[i] = (messages, True)
100 | # print("2", sum_kept_chunks(chunks, model))
101 |
102 | # 3. Fill it up
103 | for i in range(len(chunks))[::-1]:
104 | messages, kept = chunks[i]
105 | if kept:
106 | # print('+')
107 | continue
108 | elif (
109 | _sum_kept_chunks(chunks, model) + _sum_messages(messages, model)
110 | < max_tokens
111 | ):
112 | # print('-', len(messages))
113 | chunks[i] = (messages, True)
114 | else:
115 | # print("N", sum_kept_chunks(chunks, model), sum_messages(messages, model))
116 | break
117 |
118 | # print("3", sum_kept_chunks(chunks, model))
119 |
120 | assert (
121 | _sum_kept_chunks(chunks, model) < max_tokens
122 | ), f"New conversation too big {_sum_kept_chunks(chunks, model)} vs {max_tokens}!"
123 |
124 | return [m for (messages, kept) in chunks if kept for m in messages]
125 |
--------------------------------------------------------------------------------
/src/chatdbg/util/wrap.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 |
3 |
4 | def word_wrap_except_code_blocks(text: str, width: int = 80) -> str:
5 | """
6 | Wraps text except for code blocks for nice terminal formatting.
7 |
8 | Splits the text into paragraphs and wraps each paragraph,
9 | except for paragraphs that are inside of code blocks denoted
10 | by ` ``` `. Returns the updated text.
11 |
12 | Args:
13 | text (str): The text to wrap.
14 | width (int): The width of the lines to wrap at, passed to `textwrap.fill`.
15 |
16 | Returns:
17 | The wrapped text.
18 | """
19 | blocks = text.split("```")
20 | for i in range(0, len(blocks), 2):
21 | paras = blocks[i].split("\n")
22 | wrapped = [textwrap.fill(para, width=width) for para in paras]
23 | blocks[i] = "\n".join(wrapped)
24 |
25 | return "```".join(blocks)
26 |
--------------------------------------------------------------------------------