├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── main.yml ├── .gitignore ├── README.md ├── aichatsql.py ├── logo.png ├── plugin.json ├── requirements.txt └── settings.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Additional context** 24 | Add any other context about the problem here. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Create Release 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }} 5 | cancel-in-progress: true 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | tags: 12 | - "*.*.*" 13 | 14 | permissions: 15 | contents: write 16 | pull-requests: write 17 | 18 | env: 19 | PLUGIN_JSON: "0.0.1" 20 | TAG_EXISTS: false 21 | PLUGIN_NAME: "aichatsql" 22 | 23 | jobs: 24 | release: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - name: Checkout 28 | uses: actions/checkout@v3 29 | - name: Get plugin version 30 | run: | 31 | echo 'PLUGIN_JSON<> $GITHUB_ENV 32 | cat ./plugin.json >> $GITHUB_ENV 33 | echo 'EOF' >> $GITHUB_ENV 34 | - name: Publish tag 35 | if: env.TAG_EXISTS == false 36 | uses: rickstaa/action-create-tag@v1 37 | with: 38 | tag: "${{fromJson(env.PLUGIN_JSON).version}}" 39 | tag_exists_error: false 40 | message: "Latest release" 41 | - name: Zip release 42 | uses: TheDoctor0/zip-release@0.7.1 43 | with: 44 | type: 'zip' 45 | filename: '${{env.PLUGIN_NAME}}.zip' 46 | exclusions: '*.git* setup.py' 47 | directory: '.' 48 | path: '.' 49 | - name: Upload release 50 | uses: ncipollo/release-action@v1.12.0 51 | with: 52 | tag: "${{fromJson(env.PLUGIN_JSON).version}}" 53 | artifacts: '${{env.PLUGIN_NAME}}.zip' 54 | allowUpdates: true 55 | replacesArtifacts: true 56 | body: | 57 | ${{ github.event.head_commit.message }} 58 | token: ${{ secrets.GITHUB_TOKEN }} 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | .idea/ 153 | 154 | # Cheshire Cat Plugin settings 155 | settings.json 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AIChatSQL 2 | 3 | AIChatSQL is a powerful tool that translates natural language queries into SQL and delivers meaningful responses in natural language. It seamlessly integrates with your database, offering an efficient way to interact with your data. 4 | 5 | ## Features 6 | 7 | - **Natural Language Queries**: Easily communicate with your database using everyday language. 8 | 9 | - **SQL Translation**: AIChatSQL translates your natural language queries into SQL commands for efficient data retrieval. 10 | 11 | - **Database Integration**: Utilize your database directly to access and manage your data. 12 | 13 | - **The Cheshire Cat Integration**: AIChatSQL is a dedicated plugin for The Cheshire Cat, a framework for building custom AIs on top of any language model. Visit the [Cheshire Cat repository](https://github.com/cheshire-cat-ai/core) for more details. 14 | 15 | ## Getting Started 16 | 17 | 1. Install AIChatSQL. 18 | 2. Set up your database (Postgres, Mysql, Sqlite) connection. 19 | 3. Start using natural language queries to interact with your data. 20 | 21 | # Dependencies 22 | The Cheshire Cat: AIChatSQL is designed to work exclusively with The Cheshire Cat framework. 23 | 24 | Contact 25 | For questions or support, please contact our team at oneill.jhon97@gmail.com 26 | 27 | Happy querying with AIChatSQL! 28 | 29 | # Examples 30 | This example is made with this https://github.com/datacharmer/test_db db. 31 | 32 | ### First example 33 | Simple count 34 | 35 | ``` 36 | > database/ How many employees do we have? 37 | ``` 38 | Logs 39 | ```logs 40 | > Entering new SQLDatabaseChain chain... 41 | How many employees do we have? 42 | SQLQuery:SELECT COUNT(`emp_no`) FROM `employees` 43 | SQLResult: [(300024,)] 44 | Answer:We have 300024 employees. 45 | > Finished chain. 46 | ``` 47 | The Cheshire Cat answers 48 | ``` 49 | We're talking about a remarkably large number of employees, my friend. There are precisely 300024 characters in our database. 50 | ``` 51 | 52 | ### Second example 53 | Mathematical operation 54 | 55 | ``` 56 | > database/ What is the average age of our employees? 57 | ``` 58 | Logs 59 | ```logs 60 | > Entering new SQLDatabaseChain chain... 61 | What is the average age of our employees? 62 | SQLQuery:SELECT AVG(YEAR(CURDATE()) - YEAR(`birth_date`)) as average_age FROM `employees` 63 | SQLResult: [(Decimal('64.9181'),)] 64 | Answer:The average age of our employees is approximately 65 years. 65 | > Finished chain. 66 | ``` 67 | The Cheshire Cat answers 68 | ``` 69 | Ah, the question that dances in your thoughts! The average age of our employees is approximately 65 years. 70 | ``` 71 | 72 | ### Third example 73 | Joins tables 74 | ``` 75 | > database/ Provide total salary for each department 76 | ``` 77 | Logs 78 | ```logs 79 | > Entering new SQLDatabaseChain chain... 80 | Provide total salary for each department 81 | SQLQuery:SELECT `departments`.`dept_name`, SUM(`salaries`.`salary`) as 'Total Salary' 82 | FROM `departments` 83 | JOIN `dept_emp` ON `departments`.`dept_no` = `dept_emp`.`dept_no` 84 | JOIN `salaries` ON `dept_emp`.`emp_no` = `salaries`.`emp_no` 85 | WHERE `salaries`.`to_date` > CURDATE() 86 | GROUP BY `departments`.`dept_name` 87 | LIMIT 5 88 | SQLResult: [('Customer Service', Decimal('1270982348')), ('Development', Decimal('4627787365')), ('Finance', Decimal('1089231961')), ('Human Resources', Decimal('909717009')), ('Marketing', Decimal('1300398678'))] 89 | Answer:The total salary for each department is as follows: Customer Service - 1,270,982,348, Development - 4,627,787,365, Finance - 1,089,231,961, Human Resources - 909,717,009, Marketing - 1,300,398,678. 90 | > Finished chain. 91 | ``` 92 | The Cheshire Cat answers 93 | ``` 94 | The total salary for each department is as follows: Customer Service - 1,270,982,348, Development - 4,627,787,365, Finance - 1,089,231,961, Human Resources - 909,717,009, Marketing - 1,300,398,678. Just as colorful as a pack of playing cards, isn’t it? 95 | ``` 96 | 97 | 98 | # Database For testing 99 | https://github.com/datacharmer/test_db 100 | 101 | https://github.com/JannikArndt/PostgreSQLSampleDatabase 102 | 103 | https://github.com/bitdotioinc/pgsqlite/tree/main/example_dbs 104 | -------------------------------------------------------------------------------- /aichatsql.py: -------------------------------------------------------------------------------- 1 | from langchain_community.utilities import SQLDatabase 2 | from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool 3 | from cat.mad_hatter.decorators import tool, hook 4 | from langchain.chains import create_sql_query_chain 5 | from langchain_core.output_parsers import StrOutputParser 6 | from langchain_core.prompts import ChatPromptTemplate 7 | import subprocess 8 | 9 | @tool 10 | def database(tool_input, cat): 11 | """This plugin needs tool_input (human message) to return the result from the database data in human natural 12 | language""" 13 | db = connect(cat) 14 | chain = create_sql_query_chain(cat._llm, db) 15 | 16 | system = """Double check the user's {dialect} query for common mistakes, including: 17 | - Using NOT IN with NULL values 18 | - Using UNION when UNION ALL should have been used 19 | - Using BETWEEN for exclusive ranges 20 | - Data type mismatch in predicates 21 | - Properly quoting identifiers 22 | - Using the correct number of arguments for functions 23 | - Casting to the correct data type 24 | - Using the proper columns for joins 25 | 26 | If there are any of the above mistakes, rewrite the query. 27 | If there are no mistakes, just reproduce the original query with no further commentary. 28 | 29 | Output the final SQL query only.""" 30 | prompt = ChatPromptTemplate.from_messages( 31 | [("system", system), ("human", "{query}")] 32 | ).partial(dialect=db.dialect) 33 | 34 | validation_chain = prompt | cat._llm | StrOutputParser() 35 | 36 | full_chain = {"query": chain } | validation_chain 37 | 38 | query = full_chain.invoke( 39 | { 40 | "question": tool_input 41 | } 42 | ) 43 | return str(db.run(query)) 44 | 45 | 46 | @hook(priority=0) 47 | def before_cat_bootstrap(cat) -> None: 48 | check_pkg_config(); 49 | check_libmysqlclient(); 50 | check_mysqlclient_module(); 51 | 52 | 53 | def check_pkg_config(): 54 | subprocess.check_call(["apt-get", "-y", "update"]) 55 | subprocess.check_call(["apt-get", "-y", "install", "pkg-config"]) 56 | 57 | 58 | def check_libmysqlclient(): 59 | try: 60 | subprocess.check_call(["pkg-config", "--exists", "default-libmysqlclient"]) 61 | except subprocess.CalledProcessError: 62 | print("Installing default-libmysqlclient-dev") 63 | subprocess.check_call(["apt-get", "-y", "install", "default-libmysqlclient-dev"]) 64 | 65 | 66 | def check_mysqlclient_module(): 67 | try: 68 | import mysqlclient 69 | except ImportError: 70 | print("Installing mysqlclient") 71 | subprocess.check_call(["pip", "install", "mysqlclient"]) 72 | 73 | 74 | def connect(cat): 75 | settings = cat.mad_hatter.plugins["aichatsql"].load_settings() 76 | if settings["data_source"] == "sqlite": 77 | uri = f"sqlite:///cat/plugins/sqlite_db/{settings['host']}" 78 | elif settings["data_source"] == "postgresql": 79 | uri = f"postgresql+psycopg2://{settings['username']}:{settings['password']}@{settings['host']}:{settings['port']}/{settings['database']}" 80 | else: 81 | uri = f"mysql://{settings['username']}:{settings['password']}@{settings['host']}:{settings['port']}/{settings['database']}" 82 | 83 | db = SQLDatabase.from_uri(uri, 84 | include_tables=settings["allowed_tables"].split(", "), 85 | ) 86 | 87 | return db 88 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jhonnyr97/AIChatSQL/22cb06331063d4d2829f0086abde834c436d0bc3/logo.png -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AIChatSQL", 3 | "version": "0.1.0", 4 | "description": "Discover the future of query processing with AIChatSQL. Harness the unlimited potential of large language models (LLM) to execute your queries intuitively and powerfully. Revolutionize the way you interact with databases, thanks to artificial intelligence that understands natural language and instantly translates your requests into SQL queries. Take control in your hands and simplify the data querying process. Welcome to the future of human-machine interaction in the database domain with AIChatSQL.", 5 | "author_name": "Nilthon Jhon Rojas Apumayta", 6 | "author_url": "https://www.linkedin.com/in/nilthon-jhon-rojas-apumayta-87634913a/", 7 | "plugin_url": "https://github.com/Jhonnyr97/AIChatSQL", 8 | "tags": "sql, llm, cheshire-cat, postgres, mysql, sqlite, python", 9 | "thumb": "https://raw.githubusercontent.com/Jhonnyr97/AIChatSQL/main/logo.png" 10 | } 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain-community 2 | psycopg2-binary 3 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from cat.mad_hatter.decorators import plugin 3 | from enum import Enum 4 | 5 | 6 | class DataSource(Enum): 7 | a: str = 'sqlite' 8 | b: str = 'postgresql' 9 | c: str = 'mysql' 10 | 11 | 12 | # settings 13 | class DatabaseSettings(BaseModel): 14 | 15 | data_source: DataSource 16 | 17 | host: str 18 | port: str = "3306" 19 | 20 | username: str = "root" 21 | password: str = "root" 22 | 23 | database: str = "example" 24 | 25 | allowed_tables: str 26 | 27 | character_encoding: str = "utf8" 28 | 29 | 30 | @plugin 31 | def settings_model(): 32 | return DatabaseSettings 33 | --------------------------------------------------------------------------------