├── .editorconfig
├── .github
    ├── FUNDING.yml
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── other.md
├── .gitignore
├── README.md
├── app.py
├── charts
    ├── __init__.py
    ├── bar_chart.py
    ├── base_chart.py
    ├── heatmap_chart.py
    ├── line_chart.py
    ├── map_chart.py
    └── pie_chart.py
├── data
    ├── config
    │   ├── databases.json
    │   └── models.json
    ├── models
    │   └── .gitignore
    ├── sample
    │   └── sample.db
    └── schemas
    │   └── sample.txt
├── db
    ├── __init__.py
    ├── base_database.py
    └── sql_database.py
├── docs
    └── CLOUD_MODELS.md
├── extras
    └── images
    │   ├── logo.png
    │   ├── screenshot-2.png
    │   └── screenshot.png
├── helpers
    ├── __init__.py
    ├── chart.py
    ├── db.py
    ├── file.py
    ├── model.py
    ├── prompt.py
    ├── response.py
    ├── schema.py
    └── string.py
├── models
    ├── __init__.py
    ├── anthropic_model.py
    ├── base_model.py
    ├── deep_seek_model.py
    ├── gemini_model.py
    ├── grok_model.py
    ├── local_gguf_model.py
    ├── local_model.py
    └── openai_model.py
├── prompts.txt
├── requirements-gguf.txt
└── requirements.txt


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | charset = utf-8
 6 | insert_final_newline = true
 7 | indent_style = space
 8 | indent_size = 4
 9 | trim_trailing_whitespace = true
10 | 
11 | [*.html]
12 | indent_size = 4
13 | 
14 | [*.json]
15 | indent_size = 2
16 | insert_final_newline = false
17 | 
18 | [{*[Mm]akefile*,*.mak,*.mk,depend}]
19 | indent_style = tab
20 | 
21 | [*.{bat, cmd, cmd.*}]
22 | end_of_line = crlf
23 | 
24 | [*.{yml, yaml}]
25 | indent_size = 2
26 | 
27 | [*.gemspec]
28 | indent_size = 2
29 | 
30 | [*.rb,Fastfile,Gemfile,Brewfile,Podfile]
31 | indent_size = 2
32 | 
33 | [*.{kt,kts}]
34 | ktlint_standard_argument-list-wrapping = disabled
35 | ktlint_standard_trailing-comma-on-call-site = disabled
36 | ktlint_standard_trailing-comma-on-declaration-site = disabled
37 | 
38 | [*.dart]
39 | indent_size = 2
40 | indent_brace_style = K&R
41 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: paulocoutinhox
2 | ko_fi: paulocoutinho
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 
16 | 1. Go to '...'
17 | 2. Execute on terminal '....'
18 | 3. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **System (please complete the following information):**
27 |  - OS: [e.g. Ubuntu]
28 |  - Browser [e.g. Chrome, Safari, Firefox]
29 | 
30 | **Additional context**
31 | Add any other context about the problem here.
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/other.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Other type
 3 | about: Other issue type not related to bug or feature.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .DS_Store
 3 | node_modules/
 4 | build/
 5 | .idea/
 6 | venv
 7 | .env
 8 | *.iml
 9 | Thumbs.db
10 | .vscode
11 | __pycache__
12 | *.lock
13 | .gradio/
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |     <a href="https://github.com/paulocoutinhox/db-talk-ai" target="_blank" rel="noopener noreferrer">
  3 |         <img width="180" src="extras/images/logo.png" alt="Logo">
  4 |     </a>
  5 | </p>
  6 | 
  7 | # DB TALK - AI 🧠
  8 | 
  9 | **DB TALK - AI** is an interactive application built with **Python** and **Streamlit**, allowing users to query databases using **AI-generated SQL**. It supports **local AI models** or **cloud-based models** (such as **OpenAI GPT**) and provides results as **tables** and **charts**.
 10 | 
 11 | ## 🚀 Features
 12 | 
 13 | - Supports **SQLite, PostgreSQL, and MySQL**
 14 | - **AI-powered SQL generation** using **cloud-based models** or **local AI models**
 15 | - **Automatic database schema extraction** with overwrite confirmation
 16 | - **Schema files saved with unique UUIDs or predefined names**
 17 | - **Chart visualization** (Bar, Pie, and Line)
 18 | - **Streamlit web interface** for easy interaction
 19 | - **Secure database connection management** using a configuration file
 20 | - **Modular database support** sql databases
 21 | - **Dynamic model discovery** for both:
 22 |   - Local GGUF models (auto-detected from the `data/models/` directory)
 23 |   - Local Transformer models (loaded from `data/config/models.json`)
 24 | - Supports various AI providers:
 25 |   - **OpenAI GPT**
 26 |   - **DeepSeek AI**
 27 |   - **Gemini AI**
 28 |   - **Grok AI**
 29 |   - **Anthropic AI**
 30 |   - **Local Transformer models** (using Hugging Face)
 31 |   - **Local GGUF models** (using GPT4All)
 32 | - Supports a custom root directory using the **`DB_TALK_AI_ROOT`** environment variable
 33 | 
 34 | ## 📞 Installation
 35 | 
 36 | ### **1. Clone the Repository**
 37 | ```sh
 38 | git clone https://github.com/paulocoutinhox/db-talk-ai.git
 39 | cd db-talk-ai
 40 | ```
 41 | 
 42 | ### **2. Create a Virtual Environment**
 43 | ```sh
 44 | python3 -m venv .venv
 45 | source .venv/bin/activate  # macOS/Linux
 46 | .venv\Scripts\activate     # Windows
 47 | ```
 48 | 
 49 | ### **3. Install Dependencies**
 50 | ```sh
 51 | pip install -r requirements.txt
 52 | ```
 53 | 
 54 | If you want use local GGUF models install it too:
 55 | ```sh
 56 | pip install -r requirements-gguf.txt
 57 | ```
 58 | 
 59 | ### **4. Download Local AI Models (Optional)**
 60 | If you prefer to use a **local AI model** instead of a **cloud-based model** (e.g., OpenAI GPT), follow these steps:
 61 | 
 62 | #### **Step 1: Download a Local AI Model**
 63 | You can download **GGUF format** models from Hugging Face:
 64 | - [Hugging Face - GGUF Models](https://huggingface.co/models)
 65 | 
 66 | #### **Step 2: Place the Model in the `models/` Directory**
 67 | 
 68 | #### **Step 3: Select the Model in the Streamlit App**
 69 | The model selector will automatically list `.gguf` files in `models/`. Choose one in the UI.
 70 | 
 71 | #### **Step 4: Run the Application**
 72 | ```sh
 73 | streamlit run app.py
 74 | ```
 75 | 
 76 | ## ⚙️ Configuration
 77 | 
 78 | ### **1. Set API Keys (For Cloud Models)**
 79 | 
 80 | For setting up cloud-based models like **OpenAI GPT**, **DeepSeek AI**, **Gemini AI**, **Grok AI** and **Anthropic**, please refer to the separate guide:
 81 | 
 82 | 📖 [Cloud Models Configuration](docs/CLOUD_MODELS.md)
 83 | 
 84 | ### **2. Set Custom Root Directory (Optional)**
 85 | You can specify a custom root directory by setting the **`DB_TALK_AI_ROOT`** environment variable. If set, the app will look for the `data/` directory under this path instead of the default local `data/`.
 86 | 
 87 | #### **Linux/macOS**
 88 | ```sh
 89 | export DB_TALK_AI_ROOT="/path/to/custom/root"
 90 | ```
 91 | 
 92 | #### **Windows (Command Prompt)**
 93 | ```sh
 94 | set DB_TALK_AI_ROOT="C:\path\to\custom\root"
 95 | ```
 96 | 
 97 | #### **Windows (PowerShell)**
 98 | ```powershell
 99 | $env:DB_TALK_AI_ROOT="C:\path\to\custom\root"
100 | ```
101 | 
102 | ### **3. Configure Database Connections**
103 | Configure the `data/config/databases.json` file (or `{DB_TALK_AI_ROOT}/data/config/databases.json` if using the custom root):
104 | 
105 | ```json
106 | [
107 |   {
108 |     "name": "Sample SQLite DB",
109 |     "type": "sql",
110 |     "connection_string": "sqlite:///data/sample/sample.db",
111 |     "schema": "sample.txt"
112 |   },
113 |   {
114 |     "name": "Production PostgreSQL",
115 |     "type": "sql",
116 |     "connection_string": "postgresql://user:password@localhost:5432/production_db"
117 |   },
118 |   {
119 |     "name": "MySQL Backup",
120 |     "type": "sql",
121 |     "connection_string": "mysql://user:password@host:3306/dbname"
122 |   }
123 | ]
124 | ```
125 | 
126 | ### **4. Generate Database Schema**
127 | 1. Click **"Generate Database Schema"** in the UI.
128 | 2. A confirmation dialog will appear if a schema already exists.
129 | 3. The schema will be saved in the `data/schemas/` directory under the root directory.
130 | 4. The app automatically updates the `data/config/databases.json` with the new schema filename.
131 | 
132 | ## 🛠️ Usage
133 | 
134 | 1. **Run the Application**
135 |    ```sh
136 |    streamlit run app.py
137 |    ```
138 | 
139 | 2. **Steps in the Web UI**
140 |    - **Select a database** from the configured options
141 |    - **Generate the database schema** if needed
142 |    - **Choose an AI model** (cloud or local)
143 |    - **Ask questions in natural language** to generate queries
144 |    - **View results** in tables or **charts** (Bar, Pie, Line)
145 | 
146 | ## 📂 Project Structure
147 | 
148 | ```
149 | db-talk-ai/
150 | │
151 | ├── README.md               # Project documentation
152 | ├── app.py                  # Main entry point (Streamlit interface)
153 | ├── requirements.txt        # List of core dependencies
154 | ├── requirements-gguf.txt   # Dependencies for GGUF models
155 | │
156 | ├── data/                   # Data storage and configurations
157 | │   ├── config/             # Configuration files
158 | │   │   ├── databases.json  # Database connection configurations
159 | │   │   └── models.json     # Local AI model configurations (path, dtype, etc.)
160 | │   ├── models/             # Local GGUF AI model files
161 | │   │   └── Llama-3.2-3B-Instruct-Q5_K_M.gguf  # Example local GGUF model
162 | │   ├── sample/             # Sample databases for testing
163 | │   │   └── sample.db       # Example SQLite database
164 | │   └── schemas/            # Generated database schema files
165 | │       └── sample.txt      # Example schema output file
166 | │
167 | ├── models/                 # AI model implementations
168 | │   ├── anthropic_model.py  # Integration with Anthropic API
169 | │   ├── base_model.py       # Abstract base class for AI models
170 | │   ├── local_model.py      # Local model implementation using Hugging Face Transformers
171 | │   ├── local_gguf_model.py # Local GGUF model implementation using GPT4All
172 | │   ├── openai_model.py     # Integration with OpenAI API
173 | │   ├── deep_seek_model.py  # Integration with DeepSeek models
174 | │   ├── grok_model.py       # Integration with Grok AI models
175 | │   └── gemini_model.py     # Integration with Gemini AI models
176 | │
177 | ├── extras/                 # Additional resources (images, icons, etc.)
178 | │
179 | ├── helpers/                # Utility functions
180 | │   ├── chart.py            # Functions for generating charts
181 | │   ├── db.py               # Database connection handling
182 | │   ├── file.py             # File system helpers (e.g., loading models)
183 | │   ├── model.py            # AI model management functions
184 | │   ├── prompt.py           # Functions to create SQL prompts
185 | │   ├── response.py         # Functions to process and clean AI responses
186 | │   ├── schema.py           # Functions to generate database schemas
187 | │   └── string.py           # String manipulation utilities
188 | │
189 | ├── db/                     # Database connection implementations
190 | │   ├── base_database.py    # Base class for database connections
191 | │   └── sql_database.py     # SQL database connection implementation
192 | │
193 | ├── charts/                 # Chart implementations for data visualization
194 | │   ├── base_chart.py       # Base class for chart types
195 | │   ├── bar_chart.py        # Bar chart visualization
196 | │   ├── line_chart.py       # Line chart visualization
197 | │   ├── pie_chart.py        # Pie chart visualization
198 | │   ├── heatmap_chart.py    # Heatmap visualization
199 | │   └── map_chart.py        # Geographical map chart visualization
200 | │
201 | ├── prompts.txt             # Example prompts for generating SQL queries
202 | ```
203 | 
204 | ## 🤝 Contributing
205 | 
206 | 1. Fork the repository
207 | 2. Create a new branch (`git checkout -b feature-xyz`)
208 | 3. Commit changes (`git commit -m "Added new feature"`)
209 | 4. Push to the branch (`git push origin feature-xyz`)
210 | 5. Open a **pull request**
211 | 
212 | ## 📞 Contact
213 | 
214 | For issues or contributions, open a **GitHub issue** or contact:
215 | 💎 **paulocoutinhox@gmail.com**
216 | 🔗 **[GitHub](https://github.com/paulocoutinho)**
217 | 
218 | ## 🖼️ Screenshots
219 | 
220 | <img width="280" src="https://github.com/paulocoutinhox/db-talk-ai/blob/main/extras/images/screenshot.png?raw=true">
221 | 
222 | <img width="280" src="https://github.com/paulocoutinhox/db-talk-ai/blob/main/extras/images/screenshot-2.png?raw=true">
223 | 
224 | ## 📜 License
225 | 
226 | [MIT](http://opensource.org/licenses/MIT)
227 | 
228 | Copyright (c) 2025, Paulo Coutinho
229 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | 
  3 | from helpers import chart, db, model, prompt, response, schema
  4 | 
  5 | # Sidebar - Application title
  6 | st.sidebar.title("DB TALK - AI 🧠")
  7 | 
  8 | 
  9 | # Sidebar - Database Selection
 10 | st.sidebar.subheader("🛢️ Database")
 11 | 
 12 | 
 13 | # Load databases
 14 | databases = db.load_databases()
 15 | db_names = [db["name"] for db in databases]
 16 | selected_db_name = st.sidebar.selectbox("Select Database:", db_names)
 17 | 
 18 | 
 19 | # Retrieve the full configuration for the selected database
 20 | selected_db = next((db for db in databases if db["name"] == selected_db_name), None)
 21 | 
 22 | # Initialize database connection
 23 | if selected_db:
 24 |     db_type = selected_db["type"]
 25 |     db_path = selected_db["connection_string"]
 26 | 
 27 |     try:
 28 |         db_conn = db.create_database(db_type, db_path)
 29 |     except ValueError as e:
 30 |         st.sidebar.error(str(e))
 31 |         st.stop()
 32 | else:
 33 |     st.sidebar.error("❌ Database configuration not found.")
 34 |     st.stop()
 35 | 
 36 | 
 37 | # Generate schema file
 38 | schema_file = selected_db.get("schema")
 39 | 
 40 | 
 41 | # Dialogs
 42 | @st.dialog("⚠️ Confirm Schema Overwrite", width="small")
 43 | def confirm_overwrite():
 44 |     st.write(
 45 |         "This will overwrite the existing schema if it exists. Do you want to continue?"
 46 |     )
 47 |     if st.button("Yes, overwrite"):
 48 |         # Generate schema
 49 |         success, message = db.generate_schema(selected_db, databases, db_conn)
 50 | 
 51 |         if success:
 52 |             st.success(message)
 53 |             st.rerun()
 54 |         else:
 55 |             st.error(message)
 56 | 
 57 | 
 58 | # Sidebar - Schema Management
 59 | st.sidebar.subheader("📝 Database Schema")
 60 | 
 61 | if schema_file:
 62 |     st.sidebar.success(f"✅ Schema was generated: {schema_file}")
 63 | 
 64 |     if st.sidebar.button("Regenerate Database Schema"):
 65 |         confirm_overwrite()
 66 | else:
 67 |     if st.sidebar.button("Generate Database Schema"):
 68 |         confirm_overwrite()
 69 | 
 70 | 
 71 | # Load database schema
 72 | schema_info, schema_warning = schema.load_schema(schema_file)
 73 | 
 74 | if schema_warning:
 75 |     st.sidebar.warning(schema_warning)
 76 | 
 77 | 
 78 | # Sidebar - AI Model Selection
 79 | st.sidebar.subheader("🤖 AI Model")
 80 | 
 81 | models = model.load_models()
 82 | 
 83 | if not models:
 84 |     st.sidebar.error("❌ No models available, please add models.")
 85 |     st.stop()
 86 | 
 87 | model_names = [model.name() for model in models]
 88 | selected_model_name = st.sidebar.selectbox("Select AI Model:", model_names)
 89 | selected_model = next((m for m in models if m.name() == selected_model_name), None)
 90 | 
 91 | model_variants = selected_model.get_variants()
 92 | 
 93 | if model_variants:
 94 |     variant_keys = list(model_variants.keys())
 95 |     default_variant = selected_model.get_default_variant()
 96 | 
 97 |     # Pre-select the default variant if available
 98 |     selected_model_variant = st.sidebar.selectbox(
 99 |         "Select Model Variant:",
100 |         variant_keys,
101 |         format_func=lambda key: model_variants[key],
102 |         index=(
103 |             variant_keys.index(default_variant)
104 |             if default_variant in variant_keys
105 |             else 0
106 |         ),
107 |     )
108 | else:
109 |     selected_model_variant = selected_model.get_default_variant()
110 | 
111 | 
112 | # Sidebar - Chart Options
113 | st.sidebar.subheader("📊 Chart Options")
114 | generate_chart = st.sidebar.checkbox("Enable Chart Generation", value=False)
115 | 
116 | 
117 | # Load list of charts
118 | chart_classes = chart.load_charts()
119 | chart_names = [chart_class.name for chart_class in chart_classes]
120 | 
121 | selected_chart = (
122 |     st.sidebar.selectbox("Select Chart Type:", chart_names) if generate_chart else None
123 | )
124 | 
125 | 
126 | # Sidebar - Options
127 | st.sidebar.subheader("⚙️ Options")
128 | show_query = st.sidebar.checkbox("Show Query After Execution", value=False)
129 | 
130 | 
131 | # Main Area - User prompt
132 | st.header("💬 Ask Something About the Database")
133 | user_prompt = st.text_input("Enter your question below:")
134 | 
135 | if st.button("🚀 Generate"):
136 |     if not user_prompt:
137 |         st.warning("⚠️ Please enter a prompt.")
138 |     else:
139 |         chart_class = None
140 |         chart_prompt = None
141 | 
142 |         if generate_chart and selected_chart:
143 |             chart_class = next(
144 |                 (cls for cls in chart_classes if cls.name == selected_chart), None
145 |             )
146 | 
147 |             if chart_class:
148 |                 chart_prompt = chart_class.prompt
149 | 
150 |         # Build prompt with chart-specific prompt, if available
151 |         messages = prompt.build(
152 |             db_conn.get_driver_name(),
153 |             schema_info,
154 |             user_prompt,
155 |             chart_prompt,
156 |         )
157 | 
158 |         # Generate query using the selected model
159 |         try:
160 |             query = selected_model.run(
161 |                 messages,
162 |                 variant=selected_model_variant,
163 |             )
164 | 
165 |             query = response.clean(query)
166 | 
167 |             # Validate the generated query
168 |             if query.lower().startswith("error:"):
169 |                 st.error(query)
170 |             else:
171 |                 # Show query
172 |                 if show_query:
173 |                     st.divider()
174 | 
175 |                     with st.expander("🔍 Generated Query"):
176 |                         st.code(
177 |                             query,
178 |                             language=db_conn.get_code_language(),
179 |                         )
180 | 
181 |                 # Execute query
182 |                 df = db_conn.run_query(query)
183 | 
184 |                 if df is None or df.empty:
185 |                     st.warning("⚠️ No results found.")
186 |                 else:
187 |                     st.divider()
188 |                     st.subheader("📊 Query Results")
189 |                     st.dataframe(df)
190 | 
191 |                     # Generate chart if enabled
192 |                     if generate_chart:
193 |                         if chart_class:
194 |                             st.divider()
195 |                             st.subheader("📈 Generated Chart")
196 | 
197 |                             # Pass DataFrame directly to the chart class
198 |                             chart_instance = chart_class()
199 |                             chart_instance.render(df)
200 | 
201 |         except ValueError as e:
202 |             st.error(f"❌ Error: {str(e)}")
203 | 


--------------------------------------------------------------------------------
/charts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/charts/__init__.py


--------------------------------------------------------------------------------
/charts/bar_chart.py:
--------------------------------------------------------------------------------
 1 | import plotly.express as px
 2 | from pandas import DataFrame
 3 | 
 4 | from .base_chart import BaseChart
 5 | 
 6 | 
 7 | class BarChart(BaseChart):
 8 |     name = "Bar Chart"
 9 |     prompt = """
10 |     Generate a bar chart using two columns:
11 |     - One categorical column for the x-axis.
12 |     - One numerical column for the y-axis.
13 | 
14 |     The output should clearly show the numerical values distributed across different categories.
15 |     Ensure appropriate labels for clarity.
16 |     """
17 | 
18 |     def generate(self, df: DataFrame):
19 |         if len(df.columns) >= 2:
20 |             x, y = df.columns[0], df.columns[1]
21 |             return px.bar(df, x=x, y=y, title=f"{y} by {x}")
22 |         else:
23 |             raise ValueError(
24 |                 "The DataFrame must have at least two columns for a bar chart."
25 |             )
26 | 


--------------------------------------------------------------------------------
/charts/base_chart.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from pandas import DataFrame
 3 | 
 4 | 
 5 | class BaseChart:
 6 |     name = "Base Chart"
 7 |     prompt = "Generate a simple chart."
 8 | 
 9 |     def generate(self, df: DataFrame):
10 |         raise NotImplementedError("You must implement the generate method.")
11 | 
12 |     def render(self, df: DataFrame):
13 |         chart = self.generate(df)
14 | 
15 |         if chart:
16 |             st.plotly_chart(chart)
17 | 


--------------------------------------------------------------------------------
/charts/heatmap_chart.py:
--------------------------------------------------------------------------------
 1 | import plotly.express as px
 2 | from pandas import DataFrame
 3 | 
 4 | from .base_chart import BaseChart
 5 | 
 6 | 
 7 | class HeatmapChart(BaseChart):
 8 |     name = "Heatmap Chart"
 9 |     prompt = """
10 |     Generate a heatmap using geographical coordinates:
11 |     - The second column should contain latitude values.
12 |     - The third column should contain longitude values.
13 |     - The fourth column (optional) should contain intensity values.
14 | 
15 |     The map should visualize data density or intensity using a heatmap overlay.
16 |     """
17 | 
18 |     def generate(self, df: DataFrame):
19 |         if len(df.columns) >= 3:
20 |             label, lat, lon = df.columns[0], df.columns[1], df.columns[2]
21 |             intensity = df.columns[3] if len(df.columns) > 3 else None
22 | 
23 |             heatmap = px.density_map(
24 |                 df,
25 |                 lat=lat,
26 |                 lon=lon,
27 |                 z=intensity,
28 |                 radius=10,
29 |                 hover_name=label,
30 |                 hover_data=(
31 |                     {lat: True, lon: True, intensity: True}
32 |                     if intensity
33 |                     else {lat: True, lon: True}
34 |                 ),
35 |                 title="Geographical Heatmap",
36 |                 height=600,
37 |                 width=800,
38 |                 center=dict(lat=df[lat].mean(), lon=df[lon].mean()),
39 |                 color_continuous_scale="Viridis",
40 |             )
41 | 
42 |             return heatmap
43 |         else:
44 |             raise ValueError(
45 |                 "The DataFrame must have at least three columns: label, latitude, and longitude."
46 |             )
47 | 


--------------------------------------------------------------------------------
/charts/line_chart.py:
--------------------------------------------------------------------------------
 1 | import plotly.express as px
 2 | from pandas import DataFrame
 3 | 
 4 | from .base_chart import BaseChart
 5 | 
 6 | 
 7 | class LineChart(BaseChart):
 8 |     name = "Line Chart"
 9 |     prompt = """
10 |     Generate a line chart using time-series or ordered numerical data:
11 |     - The x-axis should contain time-related or sequential data.
12 |     - The y-axis should contain numerical values.
13 | 
14 |     The line should smoothly connect the points in order, representing the progression over time or sequence.
15 |     """
16 | 
17 |     def generate(self, df: DataFrame):
18 |         if len(df.columns) >= 2:
19 |             x, y = df.columns[0], df.columns[1]
20 |             return px.line(df, x=x, y=y, title=f"{y} over {x}")
21 |         else:
22 |             raise ValueError(
23 |                 "The DataFrame must have at least two columns for a line chart."
24 |             )
25 | 


--------------------------------------------------------------------------------
/charts/map_chart.py:
--------------------------------------------------------------------------------
 1 | import plotly.express as px
 2 | from pandas import DataFrame
 3 | 
 4 | from .base_chart import BaseChart
 5 | 
 6 | 
 7 | class MapChart(BaseChart):
 8 |     name = "Map Chart"
 9 |     prompt = """
10 |     Generate a scatter map using geographical coordinates:
11 |     - The second column should contain latitude values.
12 |     - The third column should contain longitude values.
13 | 
14 |     The map should plot points based on the provided geographical coordinates.
15 |     Additional columns can be used for hover information.
16 |     """
17 | 
18 |     def generate(self, df: DataFrame):
19 |         if len(df.columns) >= 3:
20 |             label, lat, lon = df.columns[0], df.columns[1], df.columns[2]
21 | 
22 |             # Using scatter_mapbox with default layout
23 |             map = px.scatter_map(
24 |                 df,
25 |                 lat=lat,
26 |                 lon=lon,
27 |                 text=label,
28 |                 hover_name=label,
29 |                 hover_data={lat: True, lon: True},
30 |                 title="Geographical Scatter Map",
31 |                 height=600,
32 |                 width=800,
33 |             )
34 | 
35 |             # Customize marker appearance
36 |             map.update_traces(
37 |                 marker=dict(
38 |                     size=14,
39 |                     symbol="marker",
40 |                     color="red",
41 |                     opacity=0.9,
42 |                 )
43 |             )
44 | 
45 |             return map
46 |         else:
47 |             raise ValueError(
48 |                 "The DataFrame must have at least three columns: label, latitude, and longitude."
49 |             )
50 | 


--------------------------------------------------------------------------------
/charts/pie_chart.py:
--------------------------------------------------------------------------------
 1 | import plotly.express as px
 2 | from pandas import DataFrame
 3 | 
 4 | from .base_chart import BaseChart
 5 | 
 6 | 
 7 | class PieChart(BaseChart):
 8 |     name = "Pie Chart"
 9 |     prompt = """
10 |     Generate a pie chart representing the distribution of categories:
11 |     - The first column should contain category names (labels).
12 |     - The second column should contain numerical values (sizes).
13 | 
14 |     The chart should clearly show the proportion of each category relative to the total.
15 |     """
16 | 
17 |     def generate(self, df: DataFrame):
18 |         if len(df.columns) >= 2:
19 |             x, y = df.columns[0], df.columns[1]
20 |             return px.pie(df, names=x, values=y, title=f"{y} Distribution by {x}")
21 |         else:
22 |             raise ValueError(
23 |                 "The DataFrame must have at least two columns for a pie chart."
24 |             )
25 | 


--------------------------------------------------------------------------------
/data/config/databases.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "name": "Sample SQLite DB",
4 |         "type": "sql",
5 |         "connection_string": "sqlite:///data/sample/sample.db",
6 |         "schema": "sample.txt"
7 |     }
8 | ]


--------------------------------------------------------------------------------
/data/config/models.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "name": "Qwen 2.5 3B Instruct",
4 |     "path": "Qwen/Qwen2.5-3B-Instruct",
5 |     "torch_dtype": "auto"
6 |   }
7 | ]


--------------------------------------------------------------------------------
/data/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/data/sample/sample.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/data/sample/sample.db


--------------------------------------------------------------------------------
/data/schemas/sample.txt:
--------------------------------------------------------------------------------
  1 | TABLE: US_CITIES
  2 | Columns:
  3 | - ID (INTEGER) PRIMARY KEY  NULLABLE
  4 | - ID_STATE (INTEGER)   NOT NULL
  5 | - CITY (TEXT(50))   NOT NULL
  6 | - COUNTY (TEXT(50))   NOT NULL
  7 | - LATITUDE (DOUBLE)   NOT NULL
  8 | - LONGITUDE (DOUBLE)   NOT NULL
  9 | 
 10 | Foreign Keys:
 11 | - ID_STATE → US_STATES.ID ON DELETE NO ACTION ON UPDATE NO ACTION
 12 | 
 13 | TABLE: US_STATES
 14 | Columns:
 15 | - ID (INTEGER) PRIMARY KEY  NULLABLE
 16 | - STATE_CODE (TEXT(2))   NOT NULL
 17 | - STATE_NAME (TEXT(50))   NOT NULL
 18 | 
 19 | TABLE: albums
 20 | Columns:
 21 | - AlbumId (INTEGER) PRIMARY KEY  NOT NULL
 22 | - Title (NVARCHAR(160))   NOT NULL
 23 | - ArtistId (INTEGER)   NOT NULL
 24 | 
 25 | Foreign Keys:
 26 | - ArtistId → artists.ArtistId ON DELETE NO ACTION ON UPDATE NO ACTION
 27 | 
 28 | Indexes:
 29 | - IFK_AlbumArtistId (NON-UNIQUE): ArtistId
 30 | 
 31 | TABLE: artists
 32 | Columns:
 33 | - ArtistId (INTEGER) PRIMARY KEY  NOT NULL
 34 | - Name (NVARCHAR(120))   NULLABLE
 35 | 
 36 | TABLE: customers
 37 | Columns:
 38 | - CustomerId (INTEGER) PRIMARY KEY  NOT NULL
 39 | - FirstName (NVARCHAR(40))   NOT NULL
 40 | - LastName (NVARCHAR(20))   NOT NULL
 41 | - Company (NVARCHAR(80))   NULLABLE
 42 | - Address (NVARCHAR(70))   NULLABLE
 43 | - City (NVARCHAR(40))   NULLABLE
 44 | - State (NVARCHAR(40))   NULLABLE
 45 | - Country (NVARCHAR(40))   NULLABLE
 46 | - PostalCode (NVARCHAR(10))   NULLABLE
 47 | - Phone (NVARCHAR(24))   NULLABLE
 48 | - Fax (NVARCHAR(24))   NULLABLE
 49 | - Email (NVARCHAR(60))   NOT NULL
 50 | - SupportRepId (INTEGER)   NULLABLE
 51 | 
 52 | Foreign Keys:
 53 | - SupportRepId → employees.EmployeeId ON DELETE NO ACTION ON UPDATE NO ACTION
 54 | 
 55 | Indexes:
 56 | - IFK_CustomerSupportRepId (NON-UNIQUE): SupportRepId
 57 | 
 58 | TABLE: employees
 59 | Columns:
 60 | - EmployeeId (INTEGER) PRIMARY KEY  NOT NULL
 61 | - LastName (NVARCHAR(20))   NOT NULL
 62 | - FirstName (NVARCHAR(20))   NOT NULL
 63 | - Title (NVARCHAR(30))   NULLABLE
 64 | - ReportsTo (INTEGER)   NULLABLE
 65 | - BirthDate (DATETIME)   NULLABLE
 66 | - HireDate (DATETIME)   NULLABLE
 67 | - Address (NVARCHAR(70))   NULLABLE
 68 | - City (NVARCHAR(40))   NULLABLE
 69 | - State (NVARCHAR(40))   NULLABLE
 70 | - Country (NVARCHAR(40))   NULLABLE
 71 | - PostalCode (NVARCHAR(10))   NULLABLE
 72 | - Phone (NVARCHAR(24))   NULLABLE
 73 | - Fax (NVARCHAR(24))   NULLABLE
 74 | - Email (NVARCHAR(60))   NULLABLE
 75 | 
 76 | Foreign Keys:
 77 | - ReportsTo → employees.EmployeeId ON DELETE NO ACTION ON UPDATE NO ACTION
 78 | 
 79 | Indexes:
 80 | - IFK_EmployeeReportsTo (NON-UNIQUE): ReportsTo
 81 | 
 82 | TABLE: genres
 83 | Columns:
 84 | - GenreId (INTEGER) PRIMARY KEY  NOT NULL
 85 | - Name (NVARCHAR(120))   NULLABLE
 86 | 
 87 | TABLE: invoice_items
 88 | Columns:
 89 | - InvoiceLineId (INTEGER) PRIMARY KEY  NOT NULL
 90 | - InvoiceId (INTEGER)   NOT NULL
 91 | - TrackId (INTEGER)   NOT NULL
 92 | - UnitPrice (NUMERIC(10, 2))   NOT NULL
 93 | - Quantity (INTEGER)   NOT NULL
 94 | 
 95 | Foreign Keys:
 96 | - TrackId → tracks.TrackId ON DELETE NO ACTION ON UPDATE NO ACTION
 97 | - InvoiceId → invoices.InvoiceId ON DELETE NO ACTION ON UPDATE NO ACTION
 98 | 
 99 | Indexes:
100 | - IFK_InvoiceLineInvoiceId (NON-UNIQUE): InvoiceId
101 | - IFK_InvoiceLineTrackId (NON-UNIQUE): TrackId
102 | 
103 | TABLE: invoices
104 | Columns:
105 | - InvoiceId (INTEGER) PRIMARY KEY  NOT NULL
106 | - CustomerId (INTEGER)   NOT NULL
107 | - InvoiceDate (DATETIME)   NOT NULL
108 | - BillingAddress (NVARCHAR(70))   NULLABLE
109 | - BillingCity (NVARCHAR(40))   NULLABLE
110 | - BillingState (NVARCHAR(40))   NULLABLE
111 | - BillingCountry (NVARCHAR(40))   NULLABLE
112 | - BillingPostalCode (NVARCHAR(10))   NULLABLE
113 | - Total (NUMERIC(10, 2))   NOT NULL
114 | 
115 | Foreign Keys:
116 | - CustomerId → customers.CustomerId ON DELETE NO ACTION ON UPDATE NO ACTION
117 | 
118 | Indexes:
119 | - IFK_InvoiceCustomerId (NON-UNIQUE): CustomerId
120 | 
121 | TABLE: media_types
122 | Columns:
123 | - MediaTypeId (INTEGER) PRIMARY KEY  NOT NULL
124 | - Name (NVARCHAR(120))   NULLABLE
125 | 
126 | TABLE: playlist_track
127 | Columns:
128 | - PlaylistId (INTEGER) PRIMARY KEY  NOT NULL
129 | - TrackId (INTEGER) PRIMARY KEY  NOT NULL
130 | 
131 | Foreign Keys:
132 | - TrackId → tracks.TrackId ON DELETE NO ACTION ON UPDATE NO ACTION
133 | - PlaylistId → playlists.PlaylistId ON DELETE NO ACTION ON UPDATE NO ACTION
134 | 
135 | Indexes:
136 | - IFK_PlaylistTrackTrackId (NON-UNIQUE): TrackId
137 | 
138 | TABLE: playlists
139 | Columns:
140 | - PlaylistId (INTEGER) PRIMARY KEY  NOT NULL
141 | - Name (NVARCHAR(120))   NULLABLE
142 | 
143 | TABLE: tracks
144 | Columns:
145 | - TrackId (INTEGER) PRIMARY KEY  NOT NULL
146 | - Name (NVARCHAR(200))   NOT NULL
147 | - AlbumId (INTEGER)   NULLABLE
148 | - MediaTypeId (INTEGER)   NOT NULL
149 | - GenreId (INTEGER)   NULLABLE
150 | - Composer (NVARCHAR(220))   NULLABLE
151 | - Milliseconds (INTEGER)   NOT NULL
152 | - Bytes (INTEGER)   NULLABLE
153 | - UnitPrice (NUMERIC(10, 2))   NOT NULL
154 | 
155 | Foreign Keys:
156 | - MediaTypeId → media_types.MediaTypeId ON DELETE NO ACTION ON UPDATE NO ACTION
157 | - GenreId → genres.GenreId ON DELETE NO ACTION ON UPDATE NO ACTION
158 | - AlbumId → albums.AlbumId ON DELETE NO ACTION ON UPDATE NO ACTION
159 | 
160 | Indexes:
161 | - IFK_TrackAlbumId (NON-UNIQUE): AlbumId
162 | - IFK_TrackGenreId (NON-UNIQUE): GenreId
163 | - IFK_TrackMediaTypeId (NON-UNIQUE): MediaTypeId
164 | 


--------------------------------------------------------------------------------
/db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/db/__init__.py


--------------------------------------------------------------------------------
/db/base_database.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class BaseDatabase(ABC):
 5 |     """Base class for all database connections."""
 6 | 
 7 |     def __init__(self, db_url):
 8 |         self.db_url = db_url
 9 | 
10 |     @abstractmethod
11 |     def connect(self):
12 |         """Establish connection with the database."""
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def run_query(self, query):
17 |         """Run a query and return the result."""
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def generate_schema(self, schema_file):
22 |         """Generate the database schema and save it to a file."""
23 |         pass
24 | 
25 |     @abstractmethod
26 |     def get_code_language(self):
27 |         """Get code language."""
28 |         pass
29 | 
30 |     @abstractmethod
31 |     def get_driver_name(self):
32 |         """Get database driver name."""
33 |         pass
34 | 


--------------------------------------------------------------------------------
/db/sql_database.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sqlalchemy import create_engine, inspect, text
  3 | from streamlit import error
  4 | 
  5 | from .base_database import BaseDatabase
  6 | 
  7 | """
  8 | - SQLite:
  9 | sqlite:///data/sample.db (relative)
 10 | sqlite:///absolute/path/sample.db (absolute)
 11 | 
 12 | - PostgreSQL:
 13 | postgresql://user:password@host:port/dbname
 14 | 
 15 | - MySQL:
 16 | mysql://user:password@host:port/dbname
 17 | """
 18 | 
 19 | 
 20 | class SQLDatabase(BaseDatabase):
 21 |     """SQL database implementation."""
 22 | 
 23 |     def __init__(self, db_url):
 24 |         super().__init__(db_url)
 25 |         self.engine = create_engine(self.db_url)
 26 | 
 27 |     def connect(self):
 28 |         """Connect to the SQL database."""
 29 |         try:
 30 |             return self.engine.connect()
 31 |         except Exception as e:
 32 |             error(f"❌ Error connecting to the database: {e}")
 33 |             return None
 34 | 
 35 |     def run_query(self, query):
 36 |         """Execute a SQL query and return the result."""
 37 |         try:
 38 |             with self.engine.connect() as conn:
 39 |                 return pd.read_sql(text(query), conn)
 40 |         except Exception as e:
 41 |             error(f"❌ Error executing the query: {e}")
 42 |             return None
 43 | 
 44 |     def get_code_language(self):
 45 |         """Get code language."""
 46 |         return "sql"
 47 | 
 48 |     def get_driver_name(self):
 49 |         """Get database driver name."""
 50 |         cs_data = self.db_url.split(":")
 51 |         return cs_data[0]
 52 | 
 53 |     def generate_schema(self, schema_file="schema.txt"):
 54 |         """Generates a schema file for the connected database with enhanced details."""
 55 |         try:
 56 |             inspector = inspect(self.engine)
 57 |             tables = inspector.get_table_names()
 58 | 
 59 |             if not tables:
 60 |                 return "⚠️ No tables found in the database."
 61 | 
 62 |             schema_details = []
 63 | 
 64 |             for table in tables:
 65 |                 schema_details.append(f"TABLE: {table}\nColumns:")
 66 | 
 67 |                 # Get column information
 68 |                 columns = inspector.get_columns(table)
 69 |                 for col in columns:
 70 |                     col_name = col["name"]
 71 |                     col_type = col["type"]
 72 |                     col_nullable = "NULLABLE" if col["nullable"] else "NOT NULL"
 73 |                     col_default = f"DEFAULT {col['default']}" if col["default"] else ""
 74 |                     col_pk = "PRIMARY KEY" if col.get("primary_key") else ""
 75 |                     col_unique = "UNIQUE" if col.get("unique") else ""
 76 | 
 77 |                     # Format column details
 78 |                     col_details = f"- {col_name} ({col_type}) {col_pk} {col_unique} {col_nullable} {col_default}".strip()
 79 |                     schema_details.append(col_details)
 80 | 
 81 |                 # Get foreign keys
 82 |                 foreign_keys = inspector.get_foreign_keys(table)
 83 |                 if foreign_keys:
 84 |                     schema_details.append("\nForeign Keys:")
 85 |                     for fk in foreign_keys:
 86 |                         fk_column = fk["constrained_columns"][0]
 87 |                         ref_table = fk["referred_table"]
 88 |                         ref_column = fk["referred_columns"][0]
 89 |                         on_delete = f"ON DELETE {fk.get('options', {}).get('ondelete', 'NO ACTION')}".upper()
 90 |                         on_update = f"ON UPDATE {fk.get('options', {}).get('onupdate', 'NO ACTION')}".upper()
 91 |                         schema_details.append(
 92 |                             f"- {fk_column} → {ref_table}.{ref_column} {on_delete} {on_update}"
 93 |                         )
 94 | 
 95 |                 # Get indexes
 96 |                 indexes = inspector.get_indexes(table)
 97 |                 if indexes:
 98 |                     schema_details.append("\nIndexes:")
 99 |                     for idx in indexes:
100 |                         index_name = idx["name"]
101 |                         columns_str = ", ".join(idx["column_names"])
102 |                         is_unique = "UNIQUE" if idx["unique"] else "NON-UNIQUE"
103 |                         schema_details.append(
104 |                             f"- {index_name} ({is_unique}): {columns_str}"
105 |                         )
106 | 
107 |                 # Get triggers (if supported)
108 |                 try:
109 |                     with self.engine.connect() as conn:
110 |                         triggers = conn.execute(
111 |                             text(
112 |                                 f"SELECT name FROM sqlite_master WHERE type='trigger' AND tbl_name='{table}';"
113 |                             )
114 |                         )
115 |                         triggers = [row[0] for row in triggers]
116 |                         if triggers:
117 |                             schema_details.append("\nTriggers:")
118 |                             for trigger in triggers:
119 |                                 schema_details.append(f"- {trigger}")
120 |                 except Exception:
121 |                     pass  # Ignore if triggers are not supported
122 | 
123 |                 # Get views (if applicable)
124 |                 views = inspector.get_view_names()
125 |                 if table in views:
126 |                     schema_details.append("\nVIEW (Readonly Table)")
127 | 
128 |                 schema_details.append("")  # Blank line to separate tables
129 | 
130 |             # Write the schema to the file
131 |             with open(schema_file, "w", encoding="utf-8") as f:
132 |                 f.write("\n".join(schema_details))
133 | 
134 |             return f"✅ Schema file '{schema_file}' generated successfully."
135 | 
136 |         except Exception as e:
137 |             return f"❌ Error generating schema: {e}"
138 | 


--------------------------------------------------------------------------------
/docs/CLOUD_MODELS.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # CLOUD MODELS
  3 | 
  4 | This document explains how to configure API keys for cloud-based models like **OpenAI GPT**, **DeepSeek AI**, **Gemini AI**, **Grok AI** and **Anthropic AI**.
  5 | 
  6 | ## 🔑 **Supported API Providers**
  7 | 
  8 | - **OpenAI GPT**
  9 | - **DeepSeek AI**
 10 | - **Gemini AI**
 11 | - **Grok AI**
 12 | - **Anthropic AI**
 13 | 
 14 | ## ⚙️ **How to Set API Keys**
 15 | 
 16 | Set the appropriate API keys for each provider as environment variables.
 17 | 
 18 | ### 🔸 **OpenAI API Key**
 19 | 
 20 | #### Linux/macOS:
 21 | ```bash
 22 | export OPENAI_API_KEY="your-openai-api-key"
 23 | ```
 24 | 
 25 | #### Windows (Command Prompt):
 26 | ```cmd
 27 | set OPENAI_API_KEY="your-openai-api-key"
 28 | ```
 29 | 
 30 | #### Windows (PowerShell):
 31 | ```powershell
 32 | $env:OPENAI_API_KEY="your-openai-api-key"
 33 | ```
 34 | 
 35 | ### 🔸 **DeepSeek API Key**
 36 | 
 37 | #### Linux/macOS:
 38 | ```bash
 39 | export DEEPSEEK_API_KEY="your-deepseek-api-key"
 40 | ```
 41 | 
 42 | #### Windows (Command Prompt):
 43 | ```cmd
 44 | set DEEPSEEK_API_KEY="your-deepseek-api-key"
 45 | ```
 46 | 
 47 | #### Windows (PowerShell):
 48 | ```powershell
 49 | $env:DEEPSEEK_API_KEY="your-deepseek-api-key"
 50 | ```
 51 | 
 52 | ### 🔸 **Gemini API Key**
 53 | 
 54 | #### Linux/macOS:
 55 | ```bash
 56 | export GEMINI_API_KEY="your-gemini-api-key"
 57 | ```
 58 | 
 59 | #### Windows (Command Prompt):
 60 | ```cmd
 61 | set GEMINI_API_KEY="your-gemini-api-key"
 62 | ```
 63 | 
 64 | #### Windows (PowerShell):
 65 | ```powershell
 66 | $env:GEMINI_API_KEY="your-gemini-api-key"
 67 | ```
 68 | 
 69 | ### 🔸 **Grok AI API Key**
 70 | 
 71 | #### Linux/macOS:
 72 | ```bash
 73 | export XAI_API_KEY="your-xai-api-key"
 74 | ```
 75 | 
 76 | #### Windows (Command Prompt):
 77 | ```cmd
 78 | set XAI_API_KEY="your-xai-api-key"
 79 | ```
 80 | 
 81 | #### Windows (PowerShell):
 82 | ```powershell
 83 | $env:XAI_API_KEY="your-xai-api-key"
 84 | ```
 85 | 
 86 | ### 🔸 **Anthropic AI API Key**
 87 | 
 88 | #### Linux/macOS:
 89 | ```bash
 90 | export ANTHROPIC_API_KEY="your-anthropic-api-key"
 91 | ```
 92 | 
 93 | #### Windows (Command Prompt):
 94 | ```cmd
 95 | set ANTHROPIC_API_KEY="your-anthropic-api-key"
 96 | ```
 97 | 
 98 | #### Windows (PowerShell):
 99 | ```powershell
100 | $env:ANTHROPIC_API_KEY="your-anthropic-api-key"
101 | ```
102 | 
103 | ## 📌 **Persistent Configuration**
104 | 
105 | To make these environment variables permanent:
106 | 
107 | - **Linux/macOS:** Add the export commands to your `~/.bashrc` or `~/.zshrc` file.
108 | - **Windows:** Set environment variables via **System Properties** > **Advanced** > **Environment Variables**.
109 | 
110 | ## ❓ **Troubleshooting**
111 | 
112 | - Ensure your API key is valid and active.
113 | - Restart your terminal or development environment after setting the environment variables.
114 | - Double-check the exact spelling of the environment variable names.
115 | 


--------------------------------------------------------------------------------
/extras/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/extras/images/logo.png


--------------------------------------------------------------------------------
/extras/images/screenshot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/extras/images/screenshot-2.png


--------------------------------------------------------------------------------
/extras/images/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/extras/images/screenshot.png


--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/helpers/__init__.py


--------------------------------------------------------------------------------
/helpers/chart.py:
--------------------------------------------------------------------------------
 1 | from charts.bar_chart import BarChart
 2 | from charts.heatmap_chart import HeatmapChart
 3 | from charts.line_chart import LineChart
 4 | from charts.map_chart import MapChart
 5 | from charts.pie_chart import PieChart
 6 | 
 7 | 
 8 | def load_charts():
 9 |     # List of charts
10 |     chart_classes = [
11 |         BarChart,
12 |         LineChart,
13 |         PieChart,
14 |         MapChart,
15 |         HeatmapChart,
16 |     ]
17 | 
18 |     # Sort charts alphabetically by name
19 |     sorted_chart_classes = sorted(chart_classes, key=lambda c: c.name)
20 | 
21 |     return sorted_chart_classes
22 | 


--------------------------------------------------------------------------------
/helpers/db.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import uuid
 4 | 
 5 | from db.sql_database import SQLDatabase
 6 | from helpers import file
 7 | 
 8 | 
 9 | def load_databases():
10 |     """Load database configurations from a JSON file."""
11 |     file_path = file.get_databases_file()
12 | 
13 |     with open(file_path, "r", encoding="utf-8") as f:
14 |         return json.load(f)
15 | 
16 | 
17 | def save_databases(databases):
18 |     """Save the database configurations to a JSON file."""
19 |     file_path = file.get_databases_file()
20 | 
21 |     with open(file_path, "w", encoding="utf-8") as f:
22 |         json.dump(databases, f, indent=4)
23 | 
24 | 
25 | def create_database(db_type, db_url):
26 |     """Create a database connection based on the provided type."""
27 |     if db_type == "sql":
28 |         return SQLDatabase(db_url)
29 |     else:
30 |         raise ValueError(f"❌ Unsupported database type: {db_type}")
31 | 
32 | 
33 | def generate_schema(selected_db, databases, db_conn):
34 |     """Generate and save the database schema, updating the configuration if successful."""
35 |     schema_dir = file.get_schemas_folder()
36 |     os.makedirs(schema_dir, exist_ok=True)
37 | 
38 |     schema_file = selected_db.get("schema", f"{uuid.uuid4()}.txt")
39 |     schema_path = os.path.join(schema_dir, schema_file)
40 | 
41 |     # Generate the schema using the database connection
42 |     schema_message = db_conn.generate_schema(schema_path)
43 | 
44 |     if "✅" in schema_message:
45 |         # Update schema key
46 |         selected_db["schema"] = schema_file
47 |         save_databases(databases)
48 | 
49 |         return True, schema_message
50 |     else:
51 |         return False, schema_message
52 | 


--------------------------------------------------------------------------------
/helpers/file.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_root_folder():
 5 |     """Get the root folder path."""
 6 | 
 7 |     return os.getenv(
 8 |         "DB_TALK_AI_ROOT",
 9 |         os.path.dirname(
10 |             os.path.abspath(os.path.join(__file__, "..")),
11 |         ),
12 |     )
13 | 
14 | 
15 | def get_data_folder():
16 |     """Get the data folder path."""
17 |     return os.path.join(get_root_folder(), "data")
18 | 
19 | 
20 | def get_schemas_folder():
21 |     """Get the schemas folder path."""
22 |     return os.path.join(get_data_folder(), "schemas")
23 | 
24 | 
25 | def get_models_folder():
26 |     """Get the models folder path."""
27 |     return os.path.join(get_data_folder(), "models")
28 | 
29 | 
30 | def get_config_folder():
31 |     """Get the config folder path."""
32 |     return os.path.join(get_data_folder(), "config")
33 | 
34 | 
35 | def get_schema_file(name):
36 |     """Get the schema file path for a given name."""
37 |     return os.path.join(get_schemas_folder(), name)
38 | 
39 | 
40 | def get_databases_file():
41 |     """Get the databases file path."""
42 |     return os.path.join(get_config_folder(), "databases.json")
43 | 
44 | 
45 | def get_models_file():
46 |     """Get the models file path."""
47 |     return os.path.join(get_config_folder(), "models.json")
48 | 


--------------------------------------------------------------------------------
/helpers/model.py:
--------------------------------------------------------------------------------
 1 | from models.anthropic_model import AnthropicModel
 2 | from models.deep_seek_model import DeepSeekModel
 3 | from models.gemini_model import GeminiModel
 4 | from models.grok_model import GrokModel
 5 | from models.local_gguf_model import LocalGGUFModel
 6 | from models.local_model import LocalModel
 7 | from models.openai_model import OpenAIModel
 8 | 
 9 | 
10 | def load_models():
11 |     # Add fixed models
12 |     models = []
13 |     models.append(OpenAIModel())
14 |     models.append(DeepSeekModel())
15 |     models.append(GrokModel())
16 |     models.append(GeminiModel())
17 |     models.append(AnthropicModel())
18 | 
19 |     # Add local models
20 |     local_model = LocalModel()
21 | 
22 |     if local_model.get_variants():
23 |         models.append(local_model)
24 | 
25 |     # Add local gguf models
26 |     gguf_models = LocalGGUFModel()
27 | 
28 |     if gguf_models.get_variants():
29 |         models.append(gguf_models)
30 | 
31 |     return models
32 | 


--------------------------------------------------------------------------------
/helpers/prompt.py:
--------------------------------------------------------------------------------
 1 | from helpers import string
 2 | 
 3 | 
 4 | def build(db_driver, schema_info, user_prompt, chart_prompt):
 5 |     system_prompt = f"""
 6 |     Given the following {db_driver} database schema:
 7 | 
 8 |     {schema_info}
 9 | 
10 |     ### Query Rules:
11 |     - Use **only** tables and columns that exist in the schema.
12 |     - **Do not invent** table names, column names, or values.
13 |     - If the user refers to a table or column that **does not exist**, find the most relevant column based on its **meaning**.
14 |     - If no relevant column exists, respond with: "Error: The requested table or column does not exist."
15 |     - The output must contain **only** the SQL query, with no explanations, formatting, or additional text.
16 |     - The query **must** start directly with 'SELECT'.
17 |     - Dates should be formatted as 'YYYY-MM-DD'.
18 |     - **Absolutely do not use markdown (` ```sql `) or any code blocks.**
19 |     - **Do not wrap the query with triple backticks (` ``` `) or any other formatting characters.**
20 |     - The output must be **plain text** containing only the SQL query.
21 | 
22 |     ### Optimization Rules:
23 |     - Ensure queries are optimized for performance, using indexes where applicable.
24 |     - Use `JOIN` only if necessary, avoiding unnecessary joins.
25 |     - If filtering by date, ensure an indexed column is used whenever possible.
26 |     """
27 | 
28 |     # Add chart-specific rules if provided
29 |     if chart_prompt:
30 |         system_prompt += f"""
31 |         ### Chart-Specific Instructions:
32 |         {chart_prompt}
33 |         """
34 | 
35 |     system_prompt += """
36 |     Generate the SQL query for the following request **without any extra formatting**:
37 |     """
38 | 
39 |     # Clean formatting
40 |     clean_prompt = string.clean_multiline(system_prompt)
41 | 
42 |     # Final prompt structure
43 |     prompt = [
44 |         {
45 |             "role": "system",
46 |             "content": f"You are an advanced SQL assistant specialized in {db_driver} databases.",
47 |         },
48 |         {
49 |             "role": "user",
50 |             "content": f"{clean_prompt}\n\n'{user_prompt}'",
51 |         },
52 |     ]
53 | 
54 |     return prompt
55 | 


--------------------------------------------------------------------------------
/helpers/response.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def clean(text: str) -> str:
 5 |     """
 6 |     Removes markdown-style code formatting, keeping the sql content.
 7 |     """
 8 |     # remove code block delimiters (```sql ... ``` or ``` ... ```)
 9 |     text = re.sub(r"```(?:\w+)?\n([\s\S]*?)\n```", r"\1", text, flags=re.MULTILINE)
10 | 
11 |     # remove inline code formatting (`...`)
12 |     text = re.sub(r"`([^`]+)`", r"\1", text)
13 | 
14 |     return text.strip()
15 | 


--------------------------------------------------------------------------------
/helpers/schema.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from helpers import file
 4 | 
 5 | 
 6 | def load_schema(schema_file):
 7 |     if schema_file:
 8 |         schema_path = file.get_schema_file(schema_file)
 9 | 
10 |         if os.path.exists(schema_path):
11 |             with open(schema_path, "r", encoding="utf-8") as f:
12 |                 return f.read().strip(), None
13 |         else:
14 |             return "", "⚠️ Schema file not found. Generate it first."
15 |     else:
16 |         return "", "⚠️ No schema generated yet."
17 | 


--------------------------------------------------------------------------------
/helpers/string.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 | 
3 | 
4 | def clean_multiline(text):
5 |     return textwrap.dedent(text).strip()
6 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulocoutinhox/db-talk-ai/21e7f6380340461a81cb8dd1cbae88652b31f6a5/models/__init__.py


--------------------------------------------------------------------------------
/models/anthropic_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .base_model import BaseAIModel
 4 | 
 5 | 
 6 | class AnthropicModel(BaseAIModel):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.client = None
10 |         self.api_key = os.getenv("ANTHROPIC_API_KEY")
11 |         self.default_variant = "claude-3-7-sonnet-latest"
12 | 
13 |     def load(self):
14 |         if not self.api_key:
15 |             raise ValueError("The environment variable ANTHROPIC_API_KEY is missing.")
16 | 
17 |         try:
18 |             import anthropic
19 |         except ImportError:
20 |             raise ImportError(
21 |                 "The 'anthropic' library is not installed. Please install it using 'pip install anthropic'."
22 |             )
23 | 
24 |         self.client = anthropic.Anthropic(api_key=self.api_key)
25 | 
26 |     def run(self, messages, variant=None):
27 |         if self.client is None:
28 |             self.load()
29 | 
30 |         model_variant = variant or self.default_variant
31 |         prepared_input = self.prepare_messages(messages, model_variant)
32 | 
33 |         # Separate system prompt from user/assistant messages
34 |         system_prompt = ""
35 |         filtered_messages = []
36 | 
37 |         for message in prepared_input:
38 |             if message.get("role") == "system":
39 |                 system_prompt = message.get("content")
40 |             else:
41 |                 filtered_messages.append(message)
42 | 
43 |         # Make the request
44 |         response = self.client.messages.create(
45 |             model=model_variant,
46 |             max_tokens=4096,
47 |             system=system_prompt or None,
48 |             messages=filtered_messages,
49 |         )
50 | 
51 |         return response.content[0].text.strip()
52 | 
53 |     def name(self):
54 |         return "Anthropic"
55 | 
56 |     def get_variants(self):
57 |         return {
58 |             "claude-3-7-sonnet-latest": "Claude 3.7 Sonnet (200k context)",
59 |             "claude-3-5-sonnet-latest": "Claude 3.5 Sonnet (200k context)",
60 |         }
61 | 


--------------------------------------------------------------------------------
/models/base_model.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class BaseAIModel(ABC):
 5 |     def __init__(self):
 6 |         self.default_variant = None
 7 |         self.unsupported_role_models = []
 8 |         self.force_string_prompt = False
 9 | 
10 |     @abstractmethod
11 |     def load(self):
12 |         """
13 |         Load the AI model and any necessary resources.
14 | 
15 |         Raises:
16 |             Exception: If the model cannot be loaded due to missing dependencies
17 |                        or invalid configurations.
18 |         """
19 |         pass
20 | 
21 |     @abstractmethod
22 |     def run(self, messages, variant=None):
23 |         """
24 |         Run the AI model with the provided input messages and return the response.
25 | 
26 |         Args:
27 |             messages (list): A list of message dictionaries with at least two keys:
28 |                              - "role": The role of the sender (e.g., "user", "assistant").
29 |                              - "content": The actual text message to process.
30 |             variant (str, optional): An internal model identifier specifying the variant to use.
31 |                                      Defaults to None.
32 | 
33 |         Returns:
34 |             str: The response generated by the AI model, as a string.
35 | 
36 |         Raises:
37 |             Exception: If there is an issue during model execution or inference.
38 |         """
39 |         pass
40 | 
41 |     @abstractmethod
42 |     def name(self):
43 |         """
44 |         Get the name of the AI model for display purposes.
45 | 
46 |         Returns:
47 |             str: A user-friendly name of the AI model, e.g., "OpenAI GPT-4".
48 |         """
49 |         pass
50 | 
51 |     def get_variants(self):
52 |         """
53 |         Returns a dictionary of model variants.
54 | 
55 |         Returns:
56 |             dict or None: A dictionary where:
57 |                 - Key (str): Internal model identifier used in API calls.
58 |                 - Value (str): User-friendly name for display in the UI.
59 | 
60 |             If no variants are available, returns None.
61 |         """
62 |         return None
63 | 
64 |     def get_default_variant(self):
65 |         """
66 |         Returns the default variant for this model.
67 | 
68 |         Returns:
69 |             str: The internal identifier of the default variant.
70 |         """
71 |         return self.default_variant
72 | 
73 |     def prepare_messages(self, messages, variant):
74 |         """
75 |         Prepares messages for models that don't support role-based inputs
76 |         or for models that always use a string prompt.
77 | 
78 |         Args:
79 |             messages (list): List of message dictionaries with 'role' and 'content' keys.
80 |             variant (str): The model variant to check against unsupported roles.
81 | 
82 |         Returns:
83 |             str or list: A plain-text prompt (str) if roles are unsupported or if force_string_prompt is True.
84 |                          Otherwise, returns the original list of messages.
85 |         """
86 |         if self.force_string_prompt or variant in self.unsupported_role_models:
87 |             return "\n".join(
88 |                 f"{msg.get('role', 'user').capitalize()}: {msg['content']}"
89 |                 for msg in messages
90 |             )
91 | 
92 |         return messages
93 | 


--------------------------------------------------------------------------------
/models/deep_seek_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .base_model import BaseAIModel
 4 | 
 5 | 
 6 | class DeepSeekModel(BaseAIModel):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.client = None
10 |         self.api_key = os.getenv("DEEPSEEK_API_KEY")
11 |         self.default_variant = "deepseek-chat"
12 | 
13 |     def load(self):
14 |         if not self.api_key:
15 |             raise ValueError("The environment variable DEEPSEEK_API_KEY is missing.")
16 | 
17 |         try:
18 |             from openai import OpenAI
19 |         except ImportError:
20 |             raise ImportError(
21 |                 "The 'openai' library is not installed. Please install it using 'pip install openai'."
22 |             )
23 | 
24 |         self.client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
25 | 
26 |     def run(self, messages, variant=None):
27 |         if self.client is None:
28 |             self.load()
29 | 
30 |         model_variant = variant or self.default_variant
31 |         prepared_input = self.prepare_messages(messages, model_variant)
32 | 
33 |         response = self.client.chat.completions.create(
34 |             model=model_variant,
35 |             messages=prepared_input,
36 |             temperature=0.2,
37 |         )
38 | 
39 |         return response.choices[0].message.content.strip()
40 | 
41 |     def name(self):
42 |         return "DeepSeek"
43 | 
44 |     def get_variants(self):
45 |         return {
46 |             "deepseek-chat": "DeepSeek Chat (64k context)",
47 |             "deepseek-reasoner": "DeepSeek Reasoner (64k context)",
48 |         }
49 | 


--------------------------------------------------------------------------------
/models/gemini_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .base_model import BaseAIModel
 4 | 
 5 | 
 6 | class GeminiModel(BaseAIModel):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.client = None
10 |         self.api_key = os.getenv("GEMINI_API_KEY")
11 |         self.default_variant = "gemini-2.0-flash"
12 | 
13 |     def load(self):
14 |         if not self.api_key:
15 |             raise ValueError("The environment variable GEMINI_API_KEY is missing.")
16 | 
17 |         try:
18 |             from openai import OpenAI
19 |         except ImportError:
20 |             raise ImportError(
21 |                 "The 'openai' library is not installed. Please install it using 'pip install openai'."
22 |             )
23 | 
24 |         self.client = OpenAI(
25 |             api_key=self.api_key,
26 |             base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
27 |         )
28 | 
29 |     def run(self, messages, variant=None):
30 |         if self.client is None:
31 |             self.load()
32 | 
33 |         model_variant = variant or self.default_variant
34 |         prepared_input = self.prepare_messages(messages, model_variant)
35 | 
36 |         response = self.client.chat.completions.create(
37 |             model=model_variant,
38 |             messages=prepared_input,
39 |             temperature=0.2,
40 |         )
41 | 
42 |         return response.choices[0].message.content.strip()
43 | 
44 |     def name(self):
45 |         return "Gemini"
46 | 
47 |     def get_variants(self):
48 |         return {
49 |             "gemini-2.0-flash": "Gemini 2 Flash (1m context)",
50 |         }
51 | 


--------------------------------------------------------------------------------
/models/grok_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .base_model import BaseAIModel
 4 | 
 5 | 
 6 | class GrokModel(BaseAIModel):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.client = None
10 |         self.api_key = os.getenv("XAI_API_KEY")
11 |         self.default_variant = "grok-2-latest"
12 | 
13 |     def load(self):
14 |         if not self.api_key:
15 |             raise ValueError("The environment variable XAI_API_KEY is missing.")
16 | 
17 |         try:
18 |             from openai import OpenAI
19 |         except ImportError:
20 |             raise ImportError(
21 |                 "The 'openai' library is not installed. Please install it using 'pip install openai'."
22 |             )
23 | 
24 |         self.client = OpenAI(api_key=self.api_key, base_url="https://api.x.ai/v1")
25 | 
26 |     def run(self, messages, variant=None):
27 |         if self.client is None:
28 |             self.load()
29 | 
30 |         model_variant = variant or self.default_variant
31 |         prepared_input = self.prepare_messages(messages, model_variant)
32 | 
33 |         response = self.client.chat.completions.create(
34 |             model=model_variant,
35 |             messages=prepared_input,
36 |             temperature=0.2,
37 |         )
38 | 
39 |         return response.choices[0].message.content.strip()
40 | 
41 |     def name(self):
42 |         return "Grok"
43 | 
44 |     def get_variants(self):
45 |         return {
46 |             "grok-2-latest": "Grok 2 (128k context)",
47 |         }
48 | 


--------------------------------------------------------------------------------
/models/local_gguf_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from helpers import file
 4 | 
 5 | from .base_model import BaseAIModel
 6 | 
 7 | 
 8 | class LocalGGUFModel(BaseAIModel):
 9 |     available_models = {}
10 | 
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.llm = None
14 |         self.force_string_prompt = True
15 | 
16 |     def load(self, model_config):
17 |         """
18 |         Load a GGUF model using the specified configuration.
19 | 
20 |         Args:
21 |             model_config (dict): Model configuration including the path.
22 |         """
23 |         try:
24 |             from gpt4all import GPT4All
25 |         except ImportError:
26 |             raise ImportError(
27 |                 "The 'gpt4all' library is not installed. Please install it using 'pip install gpt4all' before running this feature."
28 |             )
29 | 
30 |         self.llm = GPT4All(model_name=model_config["path"])
31 | 
32 |     def run(self, messages, variant=None):
33 |         if variant is None:
34 |             raise ValueError("A valid model variant (ID) must be provided.")
35 | 
36 |         model_config = self.available_models.get(variant)
37 |         if not model_config:
38 |             raise ValueError(f"Model variant '{variant}' not found.")
39 | 
40 |         if self.llm is None:
41 |             self.load(model_config)
42 | 
43 |         model_variant = variant or self.default_variant
44 |         prepared_input = self.prepare_messages(messages, model_variant)
45 | 
46 |         response = self.llm.generate(prepared_input)
47 | 
48 |         return response.strip()
49 | 
50 |     def name(self):
51 |         return "Local GGUF"
52 | 
53 |     def get_variants(self):
54 |         if not self.available_models:
55 |             self.load_models_from_directory()
56 | 
57 |         return {
58 |             model_path: config["name"]
59 |             for model_path, config in self.available_models.items()
60 |         }
61 | 
62 |     @classmethod
63 |     def load_models_from_directory(cls):
64 |         """
65 |         Scans the models directory for GGUF files and loads their configurations.
66 | 
67 |         Returns:
68 |             dict: A dictionary of available GGUF models.
69 |         """
70 |         models_dir = file.get_models_folder()
71 | 
72 |         if not os.path.exists(models_dir):
73 |             return {}
74 | 
75 |         cls.available_models = {}
76 | 
77 |         for f in os.listdir(models_dir):
78 |             if f.endswith(".gguf"):
79 |                 model_path = os.path.join(models_dir, f)
80 |                 model_name = os.path.splitext(f)[0]
81 | 
82 |                 cls.available_models[model_path] = {
83 |                     "name": model_name,
84 |                     "path": model_path,
85 |                 }
86 | 
87 |         return cls.available_models
88 | 


--------------------------------------------------------------------------------
/models/local_model.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from helpers import file
  5 | 
  6 | from .base_model import BaseAIModel
  7 | 
  8 | 
  9 | class LocalModel(BaseAIModel):
 10 |     available_models = {}
 11 | 
 12 |     def __init__(self):
 13 |         super().__init__()
 14 |         self.client = None
 15 | 
 16 |     def load(self, model_config):
 17 |         """
 18 |         Load the model using its configuration.
 19 | 
 20 |         Args:
 21 |             model_config (dict): The configuration of the model including path, torch_dtype, and max_new_tokens.
 22 |         """
 23 |         try:
 24 |             from transformers import pipeline
 25 |         except ImportError:
 26 |             raise ImportError(
 27 |                 "The 'transformers' and 'torch' libraries are not installed. Please install them using 'pip install transformers torch' before running this feature."
 28 |             )
 29 | 
 30 |         self.client = pipeline(
 31 |             "text-generation",
 32 |             model=model_config["path"],
 33 |             torch_dtype=model_config.get("torch_dtype", "auto"),
 34 |             device_map="auto",
 35 |         )
 36 | 
 37 |     def run(self, messages, variant=None):
 38 |         if variant is None:
 39 |             raise ValueError("A valid model variant (ID) must be provided.")
 40 | 
 41 |         model_config = self.available_models.get(variant)
 42 |         if not model_config:
 43 |             raise ValueError(f"Model variant '{variant}' not found.")
 44 | 
 45 |         if self.client is None:
 46 |             self.load(model_config)
 47 | 
 48 |         outputs = self.client(
 49 |             messages,
 50 |             max_new_tokens=model_config.get("max_new_tokens", 2048),
 51 |             return_full_text=False,
 52 |         )
 53 | 
 54 |         if isinstance(outputs, list) and "generated_text" in outputs[0]:
 55 |             return str(outputs[0]["generated_text"])
 56 |         else:
 57 |             raise ValueError("Unexpected output format from the model pipeline.")
 58 | 
 59 |     def name(self):
 60 |         return "Local"
 61 | 
 62 |     def get_variants(self):
 63 |         if not self.available_models:
 64 |             self.load_models_from_config()
 65 | 
 66 |         return {
 67 |             model_id: config["name"]
 68 |             for model_id, config in self.available_models.items()
 69 |         }
 70 | 
 71 |     @classmethod
 72 |     def load_models_from_config(self):
 73 |         """
 74 |         Loads models from a JSON configuration file and stores them in available_models.
 75 | 
 76 |         Returns:
 77 |             dict: A dictionary of available model configurations.
 78 |         """
 79 |         models_file = file.get_models_file()
 80 | 
 81 |         if not os.path.exists(models_file):
 82 |             return {}
 83 | 
 84 |         with open(models_file, "r", encoding="utf-8") as f:
 85 |             model_configs = json.load(f)
 86 | 
 87 |         self.available_models = {}
 88 | 
 89 |         for model_config in model_configs:
 90 |             model_name = model_config.get("name")
 91 |             model_path = model_config.get("path")
 92 |             torch_dtype = model_config.get("torch_dtype", "auto")
 93 |             max_new_tokens = model_config.get("max_new_tokens", 4096)
 94 | 
 95 |             if not model_name or not model_path:
 96 |                 raise ValueError("Each model entry must have 'name' and 'path' fields.")
 97 | 
 98 |             self.available_models[model_path] = {
 99 |                 "name": model_name,
100 |                 "path": model_path,
101 |                 "torch_dtype": torch_dtype,
102 |                 "max_new_tokens": max_new_tokens,
103 |             }
104 | 
105 |         return self.available_models
106 | 


--------------------------------------------------------------------------------
/models/openai_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .base_model import BaseAIModel
 4 | 
 5 | 
 6 | class OpenAIModel(BaseAIModel):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.client = None
10 |         self.api_key = os.getenv("OPENAI_API_KEY")
11 |         self.default_variant = "gpt-4o-mini"
12 | 
13 |     def load(self):
14 |         if not self.api_key:
15 |             raise ValueError("The environment variable OPENAI_API_KEY is missing.")
16 | 
17 |         try:
18 |             import openai
19 |         except ImportError:
20 |             raise ImportError(
21 |                 "The 'openai' library is not installed. Please install it using 'pip install openai'."
22 |             )
23 | 
24 |         self.client = openai.OpenAI(api_key=self.api_key)
25 | 
26 |     def run(self, messages, variant=None):
27 |         if self.client is None:
28 |             self.load()
29 | 
30 |         model_variant = variant or self.default_variant
31 | 
32 |         # Prepare messages according to the model's role support
33 |         prepared_input = self.prepare_messages(messages, model_variant)
34 | 
35 |         if isinstance(prepared_input, str):
36 |             # Handle plain-text input for models without role support
37 |             response = self.client.completions.create(
38 |                 model=model_variant,
39 |                 prompt=prepared_input,
40 |                 temperature=0.2,
41 |             )
42 | 
43 |             return response.choices[0].text.strip()
44 |         elif isinstance(prepared_input, list):
45 |             # Standard Chat Completions API with role-based messages
46 |             response = self.client.chat.completions.create(
47 |                 model=model_variant,
48 |                 messages=prepared_input,
49 |                 temperature=0.2,
50 |             )
51 | 
52 |             return response.choices[0].message.content.strip()
53 | 
54 |         else:
55 |             raise ValueError(
56 |                 f"Invalid input format for model variant '{model_variant}'."
57 |             )
58 | 
59 |     def name(self):
60 |         return "OpenAI"
61 | 
62 |     def get_variants(self):
63 |         return {
64 |             "gpt-4o": "GPT-4o (128k context)",
65 |             "chatgpt-4o-latest": "ChatGPT-4o (128k context)",
66 |             "gpt-4o-mini": "GPT-4o Mini (128k context)",
67 |             "gpt-4-turbo": "GPT-4 Turbo (128k context)",
68 |             "gpt-4": "GPT-4 (8k context)",
69 |         }
70 | 


--------------------------------------------------------------------------------
/prompts.txt:
--------------------------------------------------------------------------------
1 | - Extract the domain name from a customer's email address, specifically the part after the '@' symbol.
2 | - What is the sum and the name of each of the track genres order by biggest?
3 | - What was the total purchased in 2012?
4 | - What is the sum and the name of each country have sells order by first five biggest?
5 | - What was the total purchased separated by year?
6 | - What is the first 100 cities around "new york" city for 150km using latitude and longitude?
7 | 


--------------------------------------------------------------------------------
/requirements-gguf.txt:
--------------------------------------------------------------------------------
1 | gpt4all
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | plotly
 3 | pandas
 4 | sqlalchemy
 5 | openai
 6 | transformers>=4.45.0
 7 | torch
 8 | accelerate>=0.26.0
 9 | anthropic
10 | 


--------------------------------------------------------------------------------