├── LICENSE ├── README.md ├── app.ipynb ├── bigquery-anthropic-chromadb.ipynb ├── bigquery-anthropic-marqo.ipynb ├── bigquery-anthropic-other-vectordb.ipynb ├── bigquery-anthropic-qdrant.ipynb ├── bigquery-anthropic-vannadb.ipynb ├── bigquery-gemini-chromadb.ipynb ├── bigquery-gemini-marqo.ipynb ├── bigquery-gemini-other-vectordb.ipynb ├── bigquery-gemini-qdrant.ipynb ├── bigquery-gemini-vannadb.ipynb ├── bigquery-mistral-chromadb.ipynb ├── bigquery-mistral-marqo.ipynb ├── bigquery-mistral-other-vectordb.ipynb ├── bigquery-mistral-qdrant.ipynb ├── bigquery-mistral-vannadb.ipynb ├── bigquery-ollama-chromadb.ipynb ├── bigquery-ollama-marqo.ipynb ├── bigquery-ollama-other-vectordb.ipynb ├── bigquery-ollama-qdrant.ipynb ├── bigquery-ollama-vannadb.ipynb ├── bigquery-openai-azure-chromadb.ipynb ├── bigquery-openai-azure-marqo.ipynb ├── bigquery-openai-azure-other-vectordb.ipynb ├── bigquery-openai-azure-qdrant.ipynb ├── bigquery-openai-azure-vannadb.ipynb ├── bigquery-openai-standard-chromadb.ipynb ├── bigquery-openai-standard-marqo.ipynb ├── bigquery-openai-standard-other-vectordb.ipynb ├── bigquery-openai-standard-qdrant.ipynb ├── bigquery-openai-standard-vannadb.ipynb ├── bigquery-openai-vanna-chromadb.ipynb ├── bigquery-openai-vanna-marqo.ipynb ├── bigquery-openai-vanna-other-vectordb.ipynb ├── bigquery-openai-vanna-qdrant.ipynb ├── bigquery-openai-vanna-vannadb.ipynb ├── bigquery-other-llm-chromadb.ipynb ├── bigquery-other-llm-marqo.ipynb ├── bigquery-other-llm-other-vectordb.ipynb ├── bigquery-other-llm-qdrant.ipynb ├── bigquery-other-llm-vannadb.ipynb ├── duckdb-anthropic-chromadb.ipynb ├── duckdb-anthropic-marqo.ipynb ├── duckdb-anthropic-other-vectordb.ipynb ├── duckdb-anthropic-qdrant.ipynb ├── duckdb-anthropic-vannadb.ipynb ├── duckdb-gemini-chromadb.ipynb ├── duckdb-gemini-marqo.ipynb ├── duckdb-gemini-other-vectordb.ipynb ├── duckdb-gemini-qdrant.ipynb ├── duckdb-gemini-vannadb.ipynb ├── duckdb-mistral-chromadb.ipynb ├── duckdb-mistral-marqo.ipynb ├── duckdb-mistral-other-vectordb.ipynb ├── duckdb-mistral-qdrant.ipynb ├── duckdb-mistral-vannadb.ipynb ├── duckdb-ollama-chromadb.ipynb ├── duckdb-ollama-marqo.ipynb ├── duckdb-ollama-other-vectordb.ipynb ├── duckdb-ollama-qdrant.ipynb ├── duckdb-ollama-vannadb.ipynb ├── duckdb-openai-azure-chromadb.ipynb ├── duckdb-openai-azure-marqo.ipynb ├── duckdb-openai-azure-other-vectordb.ipynb ├── duckdb-openai-azure-qdrant.ipynb ├── duckdb-openai-azure-vannadb.ipynb ├── duckdb-openai-standard-chromadb.ipynb ├── duckdb-openai-standard-marqo.ipynb ├── duckdb-openai-standard-other-vectordb.ipynb ├── duckdb-openai-standard-qdrant.ipynb ├── duckdb-openai-standard-vannadb.ipynb ├── duckdb-openai-vanna-chromadb.ipynb ├── duckdb-openai-vanna-marqo.ipynb ├── duckdb-openai-vanna-other-vectordb.ipynb ├── duckdb-openai-vanna-qdrant.ipynb ├── duckdb-openai-vanna-vannadb.ipynb ├── duckdb-other-llm-chromadb.ipynb ├── duckdb-other-llm-marqo.ipynb ├── duckdb-other-llm-other-vectordb.ipynb ├── duckdb-other-llm-qdrant.ipynb ├── duckdb-other-llm-vannadb.ipynb ├── mssql-anthropic-chromadb.ipynb ├── mssql-anthropic-marqo.ipynb ├── mssql-anthropic-other-vectordb.ipynb ├── mssql-anthropic-qdrant.ipynb ├── mssql-anthropic-vannadb.ipynb ├── mssql-gemini-chromadb.ipynb ├── mssql-gemini-marqo.ipynb ├── mssql-gemini-other-vectordb.ipynb ├── mssql-gemini-qdrant.ipynb ├── mssql-gemini-vannadb.ipynb ├── mssql-mistral-chromadb.ipynb ├── mssql-mistral-marqo.ipynb ├── mssql-mistral-other-vectordb.ipynb ├── mssql-mistral-qdrant.ipynb ├── mssql-mistral-vannadb.ipynb ├── mssql-ollama-chromadb.ipynb ├── mssql-ollama-marqo.ipynb ├── mssql-ollama-other-vectordb.ipynb ├── mssql-ollama-qdrant.ipynb ├── mssql-ollama-vannadb.ipynb ├── mssql-openai-azure-chromadb.ipynb ├── mssql-openai-azure-marqo.ipynb ├── mssql-openai-azure-other-vectordb.ipynb ├── mssql-openai-azure-qdrant.ipynb ├── mssql-openai-azure-vannadb.ipynb ├── mssql-openai-standard-chromadb.ipynb ├── mssql-openai-standard-marqo.ipynb ├── mssql-openai-standard-other-vectordb.ipynb ├── mssql-openai-standard-qdrant.ipynb ├── mssql-openai-standard-vannadb.ipynb ├── mssql-openai-vanna-chromadb.ipynb ├── mssql-openai-vanna-marqo.ipynb ├── mssql-openai-vanna-other-vectordb.ipynb ├── mssql-openai-vanna-qdrant.ipynb ├── mssql-openai-vanna-vannadb.ipynb ├── mssql-other-llm-chromadb.ipynb ├── mssql-other-llm-marqo.ipynb ├── mssql-other-llm-other-vectordb.ipynb ├── mssql-other-llm-qdrant.ipynb ├── mssql-other-llm-vannadb.ipynb ├── mysql-anthropic-chromadb.ipynb ├── mysql-anthropic-marqo.ipynb ├── mysql-anthropic-other-vectordb.ipynb ├── mysql-anthropic-qdrant.ipynb ├── mysql-anthropic-vannadb.ipynb ├── mysql-gemini-chromadb.ipynb ├── mysql-gemini-marqo.ipynb ├── mysql-gemini-other-vectordb.ipynb ├── mysql-gemini-qdrant.ipynb ├── mysql-gemini-vannadb.ipynb ├── mysql-mistral-chromadb.ipynb ├── mysql-mistral-marqo.ipynb ├── mysql-mistral-other-vectordb.ipynb ├── mysql-mistral-qdrant.ipynb ├── mysql-mistral-vannadb.ipynb ├── mysql-ollama-chromadb.ipynb ├── mysql-ollama-marqo.ipynb ├── mysql-ollama-other-vectordb.ipynb ├── mysql-ollama-qdrant.ipynb ├── mysql-ollama-vannadb.ipynb ├── mysql-openai-azure-chromadb.ipynb ├── mysql-openai-azure-marqo.ipynb ├── mysql-openai-azure-other-vectordb.ipynb ├── mysql-openai-azure-qdrant.ipynb ├── mysql-openai-azure-vannadb.ipynb ├── mysql-openai-standard-chromadb.ipynb ├── mysql-openai-standard-marqo.ipynb ├── mysql-openai-standard-other-vectordb.ipynb ├── mysql-openai-standard-qdrant.ipynb ├── mysql-openai-standard-vannadb.ipynb ├── mysql-openai-vanna-chromadb.ipynb ├── mysql-openai-vanna-marqo.ipynb ├── mysql-openai-vanna-other-vectordb.ipynb ├── mysql-openai-vanna-qdrant.ipynb ├── mysql-openai-vanna-vannadb.ipynb ├── mysql-other-llm-chromadb.ipynb ├── mysql-other-llm-marqo.ipynb ├── mysql-other-llm-other-vectordb.ipynb ├── mysql-other-llm-qdrant.ipynb ├── mysql-other-llm-vannadb.ipynb ├── oracle-anthropic-chromadb.ipynb ├── oracle-anthropic-marqo.ipynb ├── oracle-anthropic-other-vectordb.ipynb ├── oracle-anthropic-qdrant.ipynb ├── oracle-anthropic-vannadb.ipynb ├── oracle-gemini-chromadb.ipynb ├── oracle-gemini-marqo.ipynb ├── oracle-gemini-other-vectordb.ipynb ├── oracle-gemini-qdrant.ipynb ├── oracle-gemini-vannadb.ipynb ├── oracle-mistral-chromadb.ipynb ├── oracle-mistral-marqo.ipynb ├── oracle-mistral-other-vectordb.ipynb ├── oracle-mistral-qdrant.ipynb ├── oracle-mistral-vannadb.ipynb ├── oracle-ollama-chromadb.ipynb ├── oracle-ollama-marqo.ipynb ├── oracle-ollama-other-vectordb.ipynb ├── oracle-ollama-qdrant.ipynb ├── oracle-ollama-vannadb.ipynb ├── oracle-openai-azure-chromadb.ipynb ├── oracle-openai-azure-marqo.ipynb ├── oracle-openai-azure-other-vectordb.ipynb ├── oracle-openai-azure-qdrant.ipynb ├── oracle-openai-azure-vannadb.ipynb ├── oracle-openai-standard-chromadb.ipynb ├── oracle-openai-standard-marqo.ipynb ├── oracle-openai-standard-other-vectordb.ipynb ├── oracle-openai-standard-qdrant.ipynb ├── oracle-openai-standard-vannadb.ipynb ├── oracle-openai-vanna-chromadb.ipynb ├── oracle-openai-vanna-marqo.ipynb ├── oracle-openai-vanna-other-vectordb.ipynb ├── oracle-openai-vanna-qdrant.ipynb ├── oracle-openai-vanna-vannadb.ipynb ├── oracle-other-llm-chromadb.ipynb ├── oracle-other-llm-marqo.ipynb ├── oracle-other-llm-other-vectordb.ipynb ├── oracle-other-llm-qdrant.ipynb ├── oracle-other-llm-vannadb.ipynb ├── other-database-anthropic-chromadb.ipynb ├── other-database-anthropic-marqo.ipynb ├── other-database-anthropic-other-vectordb.ipynb ├── other-database-anthropic-qdrant.ipynb ├── other-database-anthropic-vannadb.ipynb ├── other-database-gemini-chromadb.ipynb ├── other-database-gemini-marqo.ipynb ├── other-database-gemini-other-vectordb.ipynb ├── other-database-gemini-qdrant.ipynb ├── other-database-gemini-vannadb.ipynb ├── other-database-mistral-chromadb.ipynb ├── other-database-mistral-marqo.ipynb ├── other-database-mistral-other-vectordb.ipynb ├── other-database-mistral-qdrant.ipynb ├── other-database-mistral-vannadb.ipynb ├── other-database-ollama-chromadb.ipynb ├── other-database-ollama-marqo.ipynb ├── other-database-ollama-other-vectordb.ipynb ├── other-database-ollama-qdrant.ipynb ├── other-database-ollama-vannadb.ipynb ├── other-database-openai-azure-chromadb.ipynb ├── other-database-openai-azure-marqo.ipynb ├── other-database-openai-azure-other-vectordb.ipynb ├── other-database-openai-azure-qdrant.ipynb ├── other-database-openai-azure-vannadb.ipynb ├── other-database-openai-standard-chromadb.ipynb ├── other-database-openai-standard-marqo.ipynb ├── other-database-openai-standard-other-vectordb.ipynb ├── other-database-openai-standard-qdrant.ipynb ├── other-database-openai-standard-vannadb.ipynb ├── other-database-openai-vanna-chromadb.ipynb ├── other-database-openai-vanna-marqo.ipynb ├── other-database-openai-vanna-other-vectordb.ipynb ├── other-database-openai-vanna-qdrant.ipynb ├── other-database-openai-vanna-vannadb.ipynb ├── other-database-other-llm-chromadb.ipynb ├── other-database-other-llm-marqo.ipynb ├── other-database-other-llm-other-vectordb.ipynb ├── other-database-other-llm-qdrant.ipynb ├── other-database-other-llm-vannadb.ipynb ├── postgres-anthropic-chromadb.ipynb ├── postgres-anthropic-marqo.ipynb ├── postgres-anthropic-other-vectordb.ipynb ├── postgres-anthropic-qdrant.ipynb ├── postgres-anthropic-vannadb.ipynb ├── postgres-gemini-chromadb.ipynb ├── postgres-gemini-marqo.ipynb ├── postgres-gemini-other-vectordb.ipynb ├── postgres-gemini-qdrant.ipynb ├── postgres-gemini-vannadb.ipynb ├── postgres-mistral-chromadb.ipynb ├── postgres-mistral-marqo.ipynb ├── postgres-mistral-other-vectordb.ipynb ├── postgres-mistral-qdrant.ipynb ├── postgres-mistral-vannadb.ipynb ├── postgres-ollama-chromadb.ipynb ├── postgres-ollama-marqo.ipynb ├── postgres-ollama-other-vectordb.ipynb ├── postgres-ollama-qdrant.ipynb ├── postgres-ollama-vannadb.ipynb ├── postgres-openai-azure-chromadb.ipynb ├── postgres-openai-azure-marqo.ipynb ├── postgres-openai-azure-other-vectordb.ipynb ├── postgres-openai-azure-qdrant.ipynb ├── postgres-openai-azure-vannadb.ipynb ├── postgres-openai-standard-chromadb.ipynb ├── postgres-openai-standard-marqo.ipynb ├── postgres-openai-standard-other-vectordb.ipynb ├── postgres-openai-standard-qdrant.ipynb ├── postgres-openai-standard-vannadb.ipynb ├── postgres-openai-vanna-chromadb.ipynb ├── postgres-openai-vanna-marqo.ipynb ├── postgres-openai-vanna-other-vectordb.ipynb ├── postgres-openai-vanna-qdrant.ipynb ├── postgres-openai-vanna-vannadb.ipynb ├── postgres-other-llm-chromadb.ipynb ├── postgres-other-llm-marqo.ipynb ├── postgres-other-llm-other-vectordb.ipynb ├── postgres-other-llm-qdrant.ipynb ├── postgres-other-llm-vannadb.ipynb ├── snowflake-anthropic-chromadb.ipynb ├── snowflake-anthropic-marqo.ipynb ├── snowflake-anthropic-other-vectordb.ipynb ├── snowflake-anthropic-qdrant.ipynb ├── snowflake-anthropic-vannadb.ipynb ├── snowflake-gemini-chromadb.ipynb ├── snowflake-gemini-marqo.ipynb ├── snowflake-gemini-other-vectordb.ipynb ├── snowflake-gemini-qdrant.ipynb ├── snowflake-gemini-vannadb.ipynb ├── snowflake-mistral-chromadb.ipynb ├── snowflake-mistral-marqo.ipynb ├── snowflake-mistral-other-vectordb.ipynb ├── snowflake-mistral-qdrant.ipynb ├── snowflake-mistral-vannadb.ipynb ├── snowflake-ollama-chromadb.ipynb ├── snowflake-ollama-marqo.ipynb ├── snowflake-ollama-other-vectordb.ipynb ├── snowflake-ollama-qdrant.ipynb ├── snowflake-ollama-vannadb.ipynb ├── snowflake-openai-azure-chromadb.ipynb ├── snowflake-openai-azure-marqo.ipynb ├── snowflake-openai-azure-other-vectordb.ipynb ├── snowflake-openai-azure-qdrant.ipynb ├── snowflake-openai-azure-vannadb.ipynb ├── snowflake-openai-standard-chromadb.ipynb ├── snowflake-openai-standard-marqo.ipynb ├── snowflake-openai-standard-other-vectordb.ipynb ├── snowflake-openai-standard-qdrant.ipynb ├── snowflake-openai-standard-vannadb.ipynb ├── snowflake-openai-vanna-chromadb.ipynb ├── snowflake-openai-vanna-marqo.ipynb ├── snowflake-openai-vanna-other-vectordb.ipynb ├── snowflake-openai-vanna-qdrant.ipynb ├── snowflake-openai-vanna-vannadb.ipynb ├── snowflake-other-llm-chromadb.ipynb ├── snowflake-other-llm-marqo.ipynb ├── snowflake-other-llm-other-vectordb.ipynb ├── snowflake-other-llm-qdrant.ipynb ├── snowflake-other-llm-vannadb.ipynb ├── sqlite-anthropic-chromadb.ipynb ├── sqlite-anthropic-marqo.ipynb ├── sqlite-anthropic-other-vectordb.ipynb ├── sqlite-anthropic-qdrant.ipynb ├── sqlite-anthropic-vannadb.ipynb ├── sqlite-gemini-chromadb.ipynb ├── sqlite-gemini-marqo.ipynb ├── sqlite-gemini-other-vectordb.ipynb ├── sqlite-gemini-qdrant.ipynb ├── sqlite-gemini-vannadb.ipynb ├── sqlite-mistral-chromadb.ipynb ├── sqlite-mistral-marqo.ipynb ├── sqlite-mistral-other-vectordb.ipynb ├── sqlite-mistral-qdrant.ipynb ├── sqlite-mistral-vannadb.ipynb ├── sqlite-ollama-chromadb.ipynb ├── sqlite-ollama-marqo.ipynb ├── sqlite-ollama-other-vectordb.ipynb ├── sqlite-ollama-qdrant.ipynb ├── sqlite-ollama-vannadb.ipynb ├── sqlite-openai-azure-chromadb.ipynb ├── sqlite-openai-azure-marqo.ipynb ├── sqlite-openai-azure-other-vectordb.ipynb ├── sqlite-openai-azure-qdrant.ipynb ├── sqlite-openai-azure-vannadb.ipynb ├── sqlite-openai-standard-chromadb.ipynb ├── sqlite-openai-standard-marqo.ipynb ├── sqlite-openai-standard-other-vectordb.ipynb ├── sqlite-openai-standard-qdrant.ipynb ├── sqlite-openai-standard-vannadb.ipynb ├── sqlite-openai-vanna-chromadb.ipynb ├── sqlite-openai-vanna-marqo.ipynb ├── sqlite-openai-vanna-other-vectordb.ipynb ├── sqlite-openai-vanna-qdrant.ipynb ├── sqlite-openai-vanna-vannadb.ipynb ├── sqlite-other-llm-chromadb.ipynb ├── sqlite-other-llm-marqo.ipynb ├── sqlite-other-llm-other-vectordb.ipynb ├── sqlite-other-llm-qdrant.ipynb └── sqlite-other-llm-vannadb.ipynb /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Vanna.AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # notebooks -------------------------------------------------------------------------------- /app.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip install vanna\n", 10 | "import vanna\n", 11 | "from vanna.remote import VannaDefault\n", 12 | "vn = VannaDefault(model='chinook', api_key=vanna.get_api_key('my-email@example.com'))\n", 13 | "vn.connect_to_sqlite('https://vanna.ai/Chinook.sqlite')\n", 14 | "vn.ask(\"What are the top 10 albums by sales?\")" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from vanna.flask import VannaFlaskApp\n", 24 | "VannaFlaskApp(vn).run()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Here's what you'll get\n", 32 | "![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [] 39 | } 40 | ], 41 | "metadata": { 42 | "language_info": { 43 | "name": "python" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 2 48 | } 49 | -------------------------------------------------------------------------------- /sqlite-anthropic-chromadb.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "7965028e-037a-503b-962e-d88f2c51d5a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Anthropic, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "bc8ce0a4-f71a-5063-a374-decce9850029", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "130a90da-ab78-5001-a078-26f435e675ea", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1820a46b-bce8-5ff9-81fc-172bb2da632f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,anthropic]'"}, {"id": "1047eab7-6b6b-57ae-99a0-f55bf953b45b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb import ChromaDB_VectorStore\n"}, {"id": "8fdd7120-2c6b-5d23-a422-158e9bdf86da", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Anthropic_Chat):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n Anthropic_Chat.__init__(self, config={'api_key': ANTHROPIC_API_KEY, 'model': ANTHROPIC_MODEL})\n\nvn = MyVanna()\n"}, {"id": "69039360-2b84-5330-91ea-3066abffee32", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-anthropic-marqo.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "9c3eaf58-a99f-579c-9e11-99922ac13220", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Anthropic, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "dd78135b-fcff-5eff-ac3b-f09433c025bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "4bce17e9-f1aa-5f1a-ad9f-a2fd64e7b9bd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "a5502eb4-fcc8-52a3-a133-db8153d85427", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,anthropic]'"}, {"id": "7be2a7a7-62dd-59a9-9b9e-cb78979f3b78", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo import Marqo\n"}, {"id": "f8efb933-4f42-576a-b0b1-4f2b352b60bc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Anthropic_Chat):\n def __init__(self, config=None):\n Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n Anthropic_Chat.__init__(self, config={'api_key': ANTHROPIC_API_KEY, 'model': ANTHROPIC_MODEL})\n\nvn = MyVanna()\n"}, {"id": "c80d9678-95e7-5e98-b2b2-86690faee30d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-gemini-chromadb.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "db6d2acc-65d0-51e5-9987-82f362181568", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Google Gemini, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "da236fc8-1989-5e48-859c-190651504118", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "677344d4-c42f-54ff-b121-5a55656d9f32", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "56f969cb-ded5-5b99-8475-7477b1b2e8d8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,gemini]'"}, {"id": "3f18d5eb-4c8b-579c-aaaf-60319e8332e2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb import ChromaDB_VectorStore\nfrom vanna.google import GoogleGeminiChat\n"}, {"id": "866ca80a-4786-5111-b0fd-8678ffd35fb2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, GoogleGeminiChat):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n GoogleGeminiChat.__init__(self, config={'api_key': GEMINI_API_KEY, 'model': GEMINI_MODEL})\n\nvn = MyVanna()\n"}, {"id": "442a242a-53e3-58af-9414-d20258240eb4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-gemini-marqo.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "67d72caf-3d0f-56ae-962b-fdf0a7a873d6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Google Gemini, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "7dd26847-ea46-5638-943c-b20d0e4357b2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "a9b3883a-f577-542c-b6db-0f0b31d077fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1edb39e2-f07f-52ee-b345-33122d3b0a2f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,gemini]'"}, {"id": "3b9bf7a5-4f0b-5c29-b861-364307d1e021", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo import Marqo\nfrom vanna.google import GoogleGeminiChat\n"}, {"id": "c3a0e53e-c694-59e8-b309-a0102645baa5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, GoogleGeminiChat):\n def __init__(self, config=None):\n Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n GoogleGeminiChat.__init__(self, config={'api_key': GEMINI_API_KEY, 'model': GEMINI_MODEL})\n\nvn = MyVanna()\n"}, {"id": "cb0d9d15-e9ca-5ae0-b464-753a998aec04", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-mistral-marqo.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "27d71ffc-bde1-5c70-9215-96d24d029e73", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "2f3c1df8-036a-5306-b321-544813c69b1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "2adcebc6-6a1f-5fa9-ba8e-69d95fba0206", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "ec51e5b5-fc27-5c00-b1a8-6c9dd188e2f5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,mistralai]'"}, {"id": "b49e1367-47e6-514f-92a9-d695d45a546c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo import Marqo\nfrom vanna.mistral import Mistral\n"}, {"id": "de83d46a-6116-5978-8b9a-f0f586a583bd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Mistral):\n def __init__(self, config=None):\n Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2fa44d9b-742c-5eed-85c6-9fee93b8428d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-ollama-chromadb.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "7b37d147-f805-5c69-b0b6-e9d5a004ad9b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Ollama, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "7ed80812-ebf1-55a9-aa96-9d3ba04738dc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "c5169b9c-6c68-592c-a4f1-e906ae4cc006", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "80739260-c798-54dd-b7aa-6cf6b14eb977", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,ollama]'"}, {"id": "bc0bbf73-fc26-5523-8c11-1cc8811ea1c5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.ollama import Ollama\nfrom vanna.chromadb import ChromaDB_VectorStore\n"}, {"id": "2c96f1ef-23ea-58a3-b86b-5ff4414f2102", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Ollama):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n Ollama.__init__(self, config=config)\n\nvn = MyVanna(config={'model': 'mistral'})\n"}, {"id": "31cdb7be-ff49-50bd-8e64-b7d55849f010", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-ollama-marqo.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "5b1fa908-747c-50fc-89b5-49deefde4403", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Ollama, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "1069f35e-91bb-50be-91e3-08c2baf4110f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "b27e0e38-bbda-5115-8c9d-2c8b882d0519", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "f461ca3d-0967-5c16-a8b1-03a7553d585f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,ollama]'"}, {"id": "7c54f96a-c044-541d-9b92-48c530c89eeb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.ollama import Ollama\nfrom vanna.marqo import Marqo\n"}, {"id": "571121dc-ad81-5558-b757-2c67541965e5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Ollama):\n def __init__(self, config=None):\n Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n Ollama.__init__(self, config=config)\n\nvn = MyVanna(config={'model': 'mistral'})\n"}, {"id": "ba10cc3e-9f55-5a57-b4eb-c309749a552f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-ollama-qdrant.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "8cfa9a73-1575-5501-ad77-d85861765ab8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Ollama, Qdrant\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "3e0a4f28-6d02-5140-9007-52127bed8fb8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "450048d5-f2a8-538e-879e-002501c9a443", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "06db2c89-5e61-54b9-a812-374289417681", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[qdrant,ollama]'"}, {"id": "e426a31f-e842-57d4-bd2e-6b795eb9bfda", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.ollama import Ollama\nfrom vanna.qdrant import Qdrant_VectorStore\nfrom qdrant_client import QdrantClient\n"}, {"id": "d7f897a6-cd6c-5dd0-9f88-999151fd5eb6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Qdrant_VectorStore, Ollama):\n def __init__(self, config=None):\n Qdrant_VectorStore.__init__(self, config=config)\n Ollama.__init__(self, config=config)\n\nvn = MyVanna(config={'client': 'QdrantClient(...)', 'model': 'mistral'})\n"}, {"id": "6994d380-5eff-5f49-92fe-37c688f7814a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-openai-vanna-chromadb.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "dd7222ea-95c3-588b-af47-bf31d521efa2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "bdcb321a-57a4-5d9f-aa3a-415acb78c275", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "85226899-c057-5af0-a12f-b2a985b60aa5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "1047eab7-6b6b-57ae-99a0-f55bf953b45b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "723a1576-b163-5c79-85cf-9ced7fc0eb40", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-openai-vanna-marqo.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "1dd3b928-587c-58a3-b41f-baf23c0cc083", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "bc98c82f-5100-5091-9f72-73a1afd3718e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "1870259a-bfe9-55da-a343-68a4e1208cfe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30f0eccb-7684-5642-98ae-31bee8662133", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo]'"}, {"id": "7be2a7a7-62dd-59a9-9b9e-cb78979f3b78", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo import Marqo\n"}, {"id": "b0e1d67a-58fd-5bcd-b2a2-a13317fda343", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo):\n def __init__(self, config=None):\n Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n\nvn = MyVanna()\n"}, {"id": "3b95eced-6d7d-54ba-8739-8d5d3cc02855", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-openai-vanna-qdrant.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "d384da22-eeaf-5e82-89a9-02abe72f4904", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Qdrant\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "a1e055de-5858-58e8-a4cf-dc5ac5fd9022", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "921e0597-f3da-54dd-a5b0-237e999da7b7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "6c5a554b-2cec-5632-a8a9-6dac37349306", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[qdrant]'"}, {"id": "a714f4e5-4ed2-5bac-b62f-cea2237edeb4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.qdrant import Qdrant_VectorStore\nfrom qdrant_client import QdrantClient\n"}, {"id": "d83f58a5-45d2-5045-a1ff-58fc80772043", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Qdrant_VectorStore):\n def __init__(self, config=None):\n Qdrant_VectorStore.__init__(self, config=config)\n\nvn = MyVanna(config={'client': 'QdrantClient(...)'})\n"}, {"id": "1e96da49-de51-5145-9a37-d532872d98d9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} -------------------------------------------------------------------------------- /sqlite-openai-vanna-vannadb.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"id": "48e2bc4d-2b8b-5e1c-beb3-194929d0419e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](app.md)."}, {"id": "663691e0-2c1a-58b2-9d05-1e9b3350c314", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which LLM do you want to use?

\n\n "}, {"id": "bea8d3be-bfee-556e-81f9-20bca420a602", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Where do you want to store the 'training' data?

\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "6160c274-caf4-537e-9a02-f6a1d7022a2c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna\nfrom vanna.remote import VannaDefault"}, {"id": "7cd78528-b0b0-5428-901c-6b5dc2158ef9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = # Your API key from https://vanna.ai/account/profile \n\nvanna_model_name = # Your model name from https://vanna.ai/account/profile \nvn = VannaDefault(model=vanna_model_name, api_key=api_key)\n"}, {"id": "2803a9e1-8a66-50b6-91e9-14c6c0ae58c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n

Which database do you want to query?

\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "068a891d-bbab-5462-9767-ebf7211fe423", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\ndf_ddl = vn.run_sql(\"SELECT type, sql FROM sqlite_master WHERE sql is not null\")\n\nfor ddl in df_ddl['sql'].to_list():\n vn.train(ddl=ddl)\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "6cf17ab9-dc48-58af-8d75-4e5590a01c88", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')\n"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "8c49dd68-3bc6-5098-93f1-2d4d8617badb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Launch the User Interface\n![vanna-flask](https://vanna.ai/blog/img/vanna-flask.gif)"}, {"id": "b87d140b-ef56-5795-b489-46bb11d01459", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.flask import VannaFlaskApp\napp = VannaFlaskApp(vn)\napp.run()"}, {"id": "29793859-c3c8-50da-994a-c8f6348d6730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional customizable interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5} --------------------------------------------------------------------------------