├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── poetry.lock ├── pyproject.toml ├── requirements.txt └── src ├── .env-example ├── app.py ├── config.py ├── prompts ├── system.prompt └── user.prompt ├── static └── all_stocks.csv ├── templates └── index.html └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | .DS_Store 148 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Dylan Castillo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Chatbot Code Interpreter 2 | 3 | This is a simple app meant to replicate some of the code interpreter capabilities of ChatGPT: it's a chatbot capable of running Python code on the browser. I created because I wanted to analyze and visualize data by "chatting" with it in plain English (see [deepshet](https://deepsheet.dylancastillo.co/)) 4 | 5 | If you're curious, check out the step-by-step [tutorial](https://dylancastillo.co/code-interpreter-chatbot-pyodide-langchain-openai/). 6 | 7 | ## Development 8 | 9 | 1. Install [Python 3.10](https://www.python.org/downloads/). 10 | 2. Install [Poetry](https://python-poetry.org/docs/#installation). 11 | 4. From the root folder of the project, install the dependencies: 12 | ``` 13 | poetry config virtualenvs.in-project true 14 | poetry install 15 | ``` 16 | 5. Update .env-example with your OpenAI secret key and save it as `.env` 17 | 6. To start the app, open a terminal in `src/app` and run `litestar run --reload --debug` 18 | 7. Go to `http://127.0.0.1`. 19 | 20 | ## Screenshots 21 | 22 | The app looks like this: 23 | 24 | 25 | 26 | ## License 27 | 28 | MIT. 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "chatbot-code-interpreter" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Dylan Castillo "] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [{include = "chatbot_code_interpreter"}] 9 | 10 | [tool.poetry.dependencies] 11 | python = "^3.10" 12 | langchain = "^0.0.166" 13 | openai = "^0.27.6" 14 | python-dotenv = "^1.0.0" 15 | litestar = {extras = ["standard"], version = "^2.0.0a6"} 16 | 17 | 18 | [tool.poetry.group.dev.dependencies] 19 | black = "^23.3.0" 20 | flake8 = "^6.0.0" 21 | mypy = "^1.3.0" 22 | notebook = "^6.5.4" 23 | 24 | [build-system] 25 | requires = ["poetry-core"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 ; python_version >= "3.10" and python_version < "4.0" 2 | aiosignal==1.3.1 ; python_version >= "3.10" and python_version < "4.0" 3 | anyio==3.6.2 ; python_version >= "3.10" and python_version < "4.0" 4 | async-timeout==4.0.2 ; python_version >= "3.10" and python_version < "4.0" 5 | attrs==23.1.0 ; python_version >= "3.10" and python_version < "4.0" 6 | certifi==2023.5.7 ; python_version >= "3.10" and python_version < "4.0" 7 | charset-normalizer==3.1.0 ; python_version >= "3.10" and python_version < "4.0" 8 | click==8.1.3 ; python_version >= "3.10" and python_version < "4.0" 9 | colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Windows" or sys_platform == "win32" and python_version >= "3.10" and python_version < "4.0" 10 | dataclasses-json==0.5.7 ; python_version >= "3.10" and python_version < "4.0" 11 | editorconfig==0.12.3 ; python_version >= "3.10" and python_version < "4.0" 12 | faker==18.7.0 ; python_version >= "3.10" and python_version < "4.0" 13 | fast-query-parsers==0.4.0 ; python_version >= "3.10" and python_version < "4.0" 14 | frozenlist==1.3.3 ; python_version >= "3.10" and python_version < "4.0" 15 | greenlet==2.0.2 ; python_version >= "3.10" and python_version < "4.0" and platform_machine == "aarch64" or python_version >= "3.10" and python_version < "4.0" and platform_machine == "ppc64le" or python_version >= "3.10" and python_version < "4.0" and platform_machine == "x86_64" or python_version >= "3.10" and python_version < "4.0" and platform_machine == "amd64" or python_version >= "3.10" and python_version < "4.0" and platform_machine == "AMD64" or python_version >= "3.10" and python_version < "4.0" and platform_machine == "win32" or python_version >= "3.10" and python_version < "4.0" and platform_machine == "WIN32" 16 | h11==0.14.0 ; python_version >= "3.10" and python_version < "4.0" 17 | httpcore==0.17.0 ; python_version >= "3.10" and python_version < "4.0" 18 | httptools==0.5.0 ; python_version >= "3.10" and python_version < "4.0" 19 | httpx==0.24.0 ; python_version >= "3.10" and python_version < "4.0" 20 | idna==3.4 ; python_version >= "3.10" and python_version < "4.0" 21 | jinja2==3.1.2 ; python_version >= "3.10" and python_version < "4.0" 22 | jsbeautifier==1.14.7 ; python_version >= "3.10" and python_version < "4.0" 23 | langchain==0.0.166 ; python_version >= "3.10" and python_version < "4.0" 24 | litestar[standard]==2.0.0a6 ; python_version >= "3.10" and python_version < "4.0" 25 | mako==1.2.4 ; python_version >= "3.10" and python_version < "4.0" 26 | markdown-it-py==2.2.0 ; python_version >= "3.10" and python_version < "4.0" 27 | markupsafe==2.1.2 ; python_version >= "3.10" and python_version < "4.0" 28 | marshmallow-enum==1.5.1 ; python_version >= "3.10" and python_version < "4.0" 29 | marshmallow==3.19.0 ; python_version >= "3.10" and python_version < "4.0" 30 | mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0" 31 | msgspec==0.15.0 ; python_version >= "3.10" and python_version < "4.0" 32 | multidict==6.0.4 ; python_version >= "3.10" and python_version < "4.0" 33 | mypy-extensions==1.0.0 ; python_version >= "3.10" and python_version < "4.0" 34 | numexpr==2.8.4 ; python_version >= "3.10" and python_version < "4.0" 35 | numpy==1.24.3 ; python_version >= "3.10" and python_version < "4.0" 36 | openai==0.27.6 ; python_version >= "3.10" and python_version < "4.0" 37 | openapi-schema-pydantic==1.2.4 ; python_version >= "3.10" and python_version < "4.0" 38 | packaging==23.1 ; python_version >= "3.10" and python_version < "4.0" 39 | polyfactory==2.1.0 ; python_version >= "3.10" and python_version < "4.0" 40 | pydantic==1.10.7 ; python_version >= "3.10" and python_version < "4.0" 41 | pygments==2.15.1 ; python_version >= "3.10" and python_version < "4.0" 42 | python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "4.0" 43 | python-dotenv==1.0.0 ; python_version >= "3.10" and python_version < "4.0" 44 | pyyaml==6.0 ; python_version >= "3.10" and python_version < "4.0" 45 | requests==2.30.0 ; python_version >= "3.10" and python_version < "4.0" 46 | rich==13.3.5 ; python_version >= "3.10" and python_version < "4.0" 47 | six==1.16.0 ; python_version >= "3.10" and python_version < "4.0" 48 | sniffio==1.3.0 ; python_version >= "3.10" and python_version < "4.0" 49 | sqlalchemy==2.0.13 ; python_version >= "3.10" and python_version < "4.0" 50 | tenacity==8.2.2 ; python_version >= "3.10" and python_version < "4.0" 51 | tqdm==4.65.0 ; python_version >= "3.10" and python_version < "4.0" 52 | typing-extensions==4.5.0 ; python_version >= "3.10" and python_version < "4.0" 53 | typing-inspect==0.8.0 ; python_version >= "3.10" and python_version < "4.0" 54 | urllib3==2.0.2 ; python_version >= "3.10" and python_version < "4.0" 55 | uvicorn[standard]==0.21.1 ; python_version >= "3.10" and python_version < "4.0" 56 | uvloop==0.17.0 ; sys_platform != "win32" and sys_platform != "cygwin" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "4.0" 57 | watchfiles==0.19.0 ; python_version >= "3.10" and python_version < "4.0" 58 | websockets==11.0.3 ; python_version >= "3.10" and python_version < "4.0" 59 | yarl==1.9.2 ; python_version >= "3.10" and python_version < "4.0" 60 | -------------------------------------------------------------------------------- /src/.env-example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | MODEL_NAME="gpt-3.5-turbo" 3 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | from langchain import LLMChain 5 | from langchain.chat_models import ChatOpenAI 6 | from litestar import Litestar, get, post 7 | from litestar.contrib.jinja import JinjaTemplateEngine 8 | from litestar.enums import RequestEncodingType 9 | from litestar.params import Body 10 | from litestar.response_containers import Template 11 | from litestar.static_files.config import StaticFilesConfig 12 | from litestar.template.config import TemplateConfig 13 | from typing_extensions import Annotated 14 | 15 | from config import OpenAI 16 | from utils import get_prompt 17 | 18 | chain_create = LLMChain( 19 | llm=ChatOpenAI( 20 | temperature=0, 21 | model_name=OpenAI.model_name, 22 | openai_api_key=OpenAI.secret_key, 23 | ), 24 | prompt=get_prompt(), 25 | ) 26 | 27 | 28 | @get(path="/", name="index") 29 | async def index() -> Template: 30 | return Template(name="index.html") 31 | 32 | 33 | @dataclass 34 | class Query: 35 | query: str 36 | df_info: str 37 | 38 | 39 | @post(path="/ask", name="ask", sync_to_thread=True) 40 | def ask( 41 | data: Annotated[Query, Body(media_type=RequestEncodingType.MULTI_PART)], 42 | ) -> str: 43 | query = data.query 44 | df_info = data.df_info 45 | 46 | chain_result = chain_create.run( 47 | { 48 | "df_info": df_info, 49 | "query": query, 50 | } 51 | ) 52 | result = chain_result.split("```python")[-1][:-3].strip() 53 | 54 | return result 55 | 56 | 57 | app = Litestar( 58 | route_handlers=[index, ask], 59 | static_files_config=[ 60 | StaticFilesConfig(directories=["static"], path="/static", name="static"), 61 | ], 62 | template_config=TemplateConfig( 63 | engine=JinjaTemplateEngine, directory=Path("templates") 64 | ), 65 | ) 66 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | from dotenv import load_dotenv 5 | import os 6 | 7 | load_dotenv() 8 | 9 | 10 | @dataclass 11 | class OpenAI: 12 | secret_key: str = os.getenv("OPENAI_SECRET_KEY", "") 13 | model_name: str = os.getenv("MODEL_CREATE", "gpt-3.5-turbo") 14 | 15 | 16 | @dataclass 17 | class Paths: 18 | root: Path = Path(__file__).parent 19 | prompts: Path = root / "prompts" 20 | -------------------------------------------------------------------------------- /src/prompts/system.prompt: -------------------------------------------------------------------------------- 1 | You are an expert Python Data Analyst. You help users write syntactically correct Python code to answer questions about data and to generate beautiful data visualizations. 2 | -------------------------------------------------------------------------------- /src/prompts/user.prompt: -------------------------------------------------------------------------------- 1 | Given an user query, create a syntactically correct Python script that will process the data (aggregations, filtering, etc) to answer the user's request. 2 | 3 | You must use the following format: 4 | 5 | Query: 6 | Dataset (): 7 | Steps: 8 | Code: ```python ``` 9 | 10 | Your ouput must start and end with ```. Don't include any comments, or explanations at the end. 11 | 12 | You must save the answer in a string variable called `result`, and the last line of code must be `result\n`. You must never give an answer using `print` such as `print("your answer here")`. 13 | 14 | You can only import standard libraries from Python, and these external libraries: pandas, and numpy. You must always include the required imports. 15 | 16 | ### EXAMPLE ### 17 | Query: What's the maximum height of males? 18 | Dataset ('analysis.csv'): 19 | 0 1 2 20 | Gender Male Female Male 21 | Height 181 172 178 22 | Weight 89 68 79 23 | Steps: 24 | 1. Filter data to include only males 25 | 2. Find maximum height of resulting rows 26 | 3. Return result to user 27 | Code: ```python 28 | import pandas as pd 29 | import numpy as np 30 | 31 | df = pd.read_csv("analysis.csv") # replace with the actual file name and path 32 | 33 | filtered_df = df[df["Gender"] == "Male"] 34 | highest_male_height = filtered_df.Height.max() 35 | result = "Height of the highest male: " + highest_male_height 36 | result 37 | 38 | ``` 39 | 40 | ### RESULT ### 41 | Query: {query} 42 | Dataset ('all_stocks.csv'): 43 | {df_info} 44 | Steps: 45 | -------------------------------------------------------------------------------- /src/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Code Runner Chatbot 9 | 10 | 11 | 12 | 13 | 14 |
15 |

Code Runner Chatbot

16 | 17 | 18 |
19 |
20 |
21 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts.chat import ( 2 | ChatPromptTemplate, 3 | HumanMessagePromptTemplate, 4 | SystemMessagePromptTemplate, 5 | ) 6 | 7 | from config import Paths 8 | 9 | 10 | def get_prompt() -> ChatPromptTemplate: 11 | with open(Paths.prompts / "system.prompt") as f: 12 | system_template = f.read().strip() 13 | 14 | with open(Paths.prompts / "user.prompt") as f: 15 | human_template = f.read().strip() 16 | input_vars = ["query", "df_info"] 17 | 18 | return ChatPromptTemplate( 19 | messages=[ 20 | SystemMessagePromptTemplate.from_template(system_template), 21 | HumanMessagePromptTemplate.from_template(human_template), 22 | ], 23 | input_variables=input_vars, 24 | ) 25 | --------------------------------------------------------------------------------