├── .gitattributes ├── .gitignore ├── README.md ├── detective_test.py ├── main.py ├── modules ├── chatbot.py ├── database │ ├── db.py │ └── sql-murder-mystery.db ├── model.py ├── settings.py └── tools.py ├── requirements.txt └── translator_test.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-documentation 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQL Detective - A Murder Mystery RAG Game 2 | 3 | SQL Detective is an interactive investigation game where you solve a murder mystery by querying a police database using natural language. This application showcases the power of Retrieval-Augmented Generation (RAG) techniques with local small language models and database integration. 4 | 5 | ## 🔎 Overview 6 | 7 | As a detective in SQL City, you're tasked with solving a murder that occurred on January 15, 2018. The uniqueness of this application lies in its approach: 8 | 9 | 1. **Natural Language to SQL Translation**: Ask questions in plain English 10 | 2. **Database Query Execution**: Retrieve specific information from the police database 11 | 3. **Context-Aware Analysis**: Get intelligent insights based on the query results 12 | 4. **Progressive Investigation**: Follow leads from clue to clue to solve the case 13 | 14 | ## 🚀 Features 15 | 16 | - **Conversational Interface**: Interact with the investigation through a chat interface 17 | - **Automatic SQL Translation**: No SQL knowledge required - just ask questions naturally 18 | - **Intelligent Analysis**: The detective AI analyzes query results and suggests next steps 19 | - **Investigation Notes**: Keep track of your findings in the detective's notebook 20 | - **Local Processing**: All processing happens locally with no data sent to external APIs 21 | 22 | ## 🛠️ Technical Stack 23 | 24 | - **Backend Framework**: `Python` 25 | - **Frontend**: `Streamlit` 26 | - **Database**: `SQLite` 27 | - **LLM Platform**: `Ollama` 28 | - **Language Model Used**: `DeepSeek-R1:1.5b` 29 | - **RAG Implementation**: `LangChain` for orchestration and context management 30 | 31 | ## 📂 Project Structure 32 | ``` 33 | Ollama-SQLite-RAG/ 34 | │ 35 | ├── main.py # Application entry point 36 | ├── modules/ 37 | │ ├── chatbot.py # Main conversation handler 38 | │ ├── model.py # Base classes for language model interactions 39 | │ ├── settings.py # Configuration settings 40 | │ ├── tools.py # Utility functions 41 | │ └── database/ 42 | │ ├── db.py # Database connection and query handling 43 | │ └── sql-murder-mystery.db # SQLite database file 44 | │ 45 | ├── detective_test.py # Test script for detective model 46 | └── translator_test.py # Test script for translator model 47 | ``` 48 | ## 🧠 How It Works 49 | 50 | 1. **User Input**: The user submits a natural language question about the case 51 | 2. **Translation**: The Translator model converts the question to a valid SQL query 52 | 3. **Database Query**: The system executes the SQL query against the police database 53 | 4. **Analysis**: The Detective model analyzes the query results in the context of the question 54 | 5. **Response**: The system presents findings and suggests next investigative steps 55 | 56 | ## 📖 RAG Architecture 57 | 58 | This application implements a complete RAG pipeline: 59 | 60 | 1. **Retrieval**: SQL queries retrieve relevant information from the database 61 | 2. **Augmentation**: The retrieved data augments the context for the language model 62 | 3. **Generation**: The language model generates insights based on the augmented context 63 | 64 | ## 🚦 Getting Started 65 | 66 | ### Prerequisites 67 | - Python 3.8+ 68 | - [Ollama](https://ollama.ai/) installed locally 69 | - `DeepSeek-R1:1.5b` model downloaded to Ollama 70 | 71 | ### Installation 72 | 73 | ```bash 74 | # Clone the repository 75 | git clone https://github.com/Ne0bliviscaris/Ollama-SQLite-RAG.git 76 | cd Ollama-SQLite-RAG 77 | 78 | # Create a virtual environment (optional but recommended) 79 | python -m venv venv 80 | source venv/bin/activate # On Windows: venv\Scripts\activate 81 | 82 | # Install dependencies 83 | pip install -r requirements.txt 84 | 85 | # Start Ollama in the background and install the model 86 | ollama run deepseek-r1:1.5b 87 | # Run the application 88 | python main.py 89 | ``` 90 | 91 | ## 🙏 Acknowledgements 92 | 93 | - [SQL Murder Mystery](https://github.com/NUKnightLab/sql-mysteries) for the original game concept 94 | - [LangChain](https://langchain.com/) for the RAG framework 95 | - [Ollama](https://ollama.ai/) for local language model support 96 | - [DeepSeek](https://www.deepseek.com) for their R1:1.5b model 97 | - [Streamlit](https://streamlit.io/) for the interactive web interface 98 | -------------------------------------------------------------------------------- /detective_test.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from modules.model import Detective 4 | 5 | # question = "find last house on Franklin Ave" 6 | # question = "find people living in the house with the largest address number on street named 'Franklin Ave'" 7 | 8 | question = "there was a 'murder' in 'SQL City' on 'january 15, 2018'. Find the report, i want to find any clues" 9 | context = r""" 10 | {"user_inputs":["there was a 'murder' in 'SQL City' on 'january 15, 2018'. Find the report, i want to find any clues"],"sql_queries":["SELECT * FROM crime_scene_report WHERE date = '20180115' AND type = 'murder'"],"query_results":[[{"date":20180115,"type":"murder","description":"Life? Dont talk to me about life.","city":"Albany"},{"date":20180115,"type":"murder","description":"Mama, I killed a man, put a gun against his head...","city":"Reno"},{"date":20180115,"type":"murder","description":"Security footage shows that there were 2 witnesses. The first witness lives at the last house on \"Northwestern Dr\". The second witness, named Annabel, lives somewhere on \"Franklin Ave\".","city":"SQL City"}]]} 11 | """ 12 | 13 | context_dict = json.loads(context.strip()) 14 | query_results = context_dict["query_results"][0] 15 | response = Detective(question, query_results) 16 | 17 | 18 | print((response.full_response)) 19 | print("\nAnswer:\n", response.answer) 20 | print("\nNext step:\n", response.next_step) 21 | print("\nThinking:\n", response.thinking) 22 | print("\nRules:\n", response.rules) 23 | print("\nPrompt:\n", response.prompt()) 24 | # print("\nContext:\n", response.context) 25 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import streamlit as st 5 | 6 | from modules.chatbot import chatbot 7 | 8 | st.set_page_config( 9 | page_title="Local SQL RAG", 10 | page_icon="🤖", 11 | menu_items={"About": "https://github.com/Ne0bliviscaris/Ollama-SQLite-RAG"}, 12 | ) 13 | 14 | 15 | def title_screen(): 16 | st.title("SQL RAG - local Ollama - Langchain") 17 | st.markdown( 18 | """ 19 | ### 🔍 Welcome, Detective 20 | 21 | A murder has been committed in SQL City, and you've been called to solve the case. 22 | 23 | Your only lead is that the crime occurred on **January 15, 2018**, but the crime scene report has gone missing. 24 | Using your detective skills and SQL knowledge, you must: 25 | 26 | 1. Query the police database to find relevant information 27 | 2. Follow leads by asking the right questions 28 | 3. Connect the dots to identify the killer 29 | 30 | Type your investigation queries in natural language, and the system will: 31 | - Translate your questions to SQL 32 | - Search the database 33 | - Help you analyze the results 34 | 35 | Can you solve the mystery before the trail goes cold? 36 | """ 37 | ) 38 | 39 | 40 | def main(): 41 | if not st.session_state: 42 | title_screen() 43 | chatbot() 44 | 45 | 46 | if __name__ == "__main__": # Poprawiono cudzysłowy 47 | # Launch streamlit and check if it's not already running 48 | if not os.environ.get("RUNNING_IN_STREAMLIT"): 49 | # Mark streamlit as running 50 | os.environ["RUNNING_IN_STREAMLIT"] = "1" 51 | file_path = os.path.abspath(__file__) 52 | # Run streamlit with correct command list 53 | subprocess.run(["streamlit", "run", file_path], check=True) 54 | else: 55 | main() 56 | -------------------------------------------------------------------------------- /modules/chatbot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import streamlit as st 3 | 4 | from modules.database.db import execute_sql_query 5 | from modules.model import Detective, Translator 6 | 7 | 8 | def chatbot(): 9 | """Main chatbot function.""" 10 | game_rules_sidebar() 11 | initialize_chat_session() 12 | display_chat_input() 13 | with st.container(): 14 | show_chat_history() 15 | rag_pipeline() 16 | 17 | 18 | def initialize_chat_session(): 19 | """Initialize chat session state variables.""" 20 | if "messages" not in st.session_state: 21 | st.session_state.messages = [] 22 | if "index" not in st.session_state: 23 | st.session_state.index = 0 24 | if "context" not in st.session_state: 25 | st.session_state.context = { 26 | "user_inputs": [], 27 | "sql_queries": [], 28 | "query_results": [], 29 | "detective_answers": [], 30 | "detective_thinking": [], 31 | } 32 | if "current_state" not in st.session_state: 33 | st.session_state.current_state = None 34 | 35 | 36 | def display_chat_input(): 37 | if prompt := st.chat_input("Ask about the case..."): 38 | index = st.session_state.index 39 | st.session_state.current_state = "translator" 40 | prefill_context() 41 | update_context("user_inputs", prompt, index) 42 | update_messages("user", prompt, index) 43 | st.session_state.index += 1 44 | st.rerun() 45 | 46 | 47 | def prefill_context(): 48 | """Prefill context with empty messages.""" 49 | for key in st.session_state.context: 50 | st.session_state.context[key].append(None) 51 | 52 | 53 | def show_chat_history(): 54 | """Display chat history with synchronized context data.""" 55 | for index, message_group in enumerate(st.session_state.messages): 56 | if not message_group: 57 | continue 58 | 59 | for message in message_group: 60 | with st.chat_message(message["role"]): 61 | st.markdown(message["content"]) 62 | 63 | if message["role"] == "database": 64 | show_query_results(index) 65 | if message["role"] == "assistant": 66 | show_thinking_process(index) 67 | 68 | 69 | def rag_pipeline(): 70 | """Main RAG pipeline.""" 71 | if st.session_state.current_state == "translator": 72 | translate_question() 73 | if st.session_state.current_state == "database": 74 | execute_query() 75 | if st.session_state.current_state == "detective": 76 | detective_conclusion() 77 | 78 | 79 | def translate_question(): 80 | """Translate the natural question to SQL query""" 81 | with st.spinner("Translating to SQL..."): 82 | current_index = st.session_state.index - 1 83 | 84 | user_input = st.session_state.context["user_inputs"][current_index] 85 | translation = Translator(user_input) 86 | 87 | update_context("sql_queries", translation.sql_query, current_index) 88 | update_messages("database", translation.sql_query, current_index) 89 | 90 | st.session_state.current_state = "database" 91 | st.rerun() 92 | 93 | 94 | def execute_query(): 95 | """Execute SQL query and return results.""" 96 | with st.spinner("Executing SQL query..."): 97 | current_index = st.session_state.index - 1 98 | query = st.session_state.context["sql_queries"][current_index] 99 | try: 100 | query_results = execute_sql_query(query) 101 | update_context("query_results", query_results, current_index) 102 | st.session_state.current_state = "detective" 103 | except Exception as e: 104 | print(f"Error executing translated query.\nRephrase the question and try again.\nError message: {e}") 105 | st.session_state.current_state = None 106 | 107 | st.rerun() 108 | 109 | 110 | def detective_conclusion(): 111 | """Detective's conclusion.""" 112 | with st.spinner("Detective is analyzing the results..."): 113 | current_index = st.session_state.index - 1 114 | 115 | user_input = st.session_state.context["user_inputs"][current_index] 116 | query_results = st.session_state.context["query_results"][current_index] 117 | 118 | detective = Detective(user_input=user_input, context=query_results) 119 | 120 | update_context("detective_answers", detective.answer, current_index) 121 | update_context("detective_thinking", detective.thinking, current_index) 122 | update_messages("assistant", detective.answer, current_index) 123 | 124 | st.session_state.current_state = None 125 | st.rerun() 126 | 127 | 128 | def update_context(key, value, index): 129 | """ "Update context value at specific index.""" 130 | context = st.session_state.context[key] 131 | while len(context) <= index: 132 | context.append(None) 133 | context[index] = value 134 | 135 | 136 | def update_messages(role, content, index): 137 | messages = st.session_state.messages 138 | while len(messages) <= index: 139 | messages.append([]) 140 | 141 | messages[index].append({"role": role, "content": content}) 142 | 143 | 144 | def show_query_results(index): 145 | """Display query results for given message index.""" 146 | query_results = st.session_state.context["query_results"][index] 147 | 148 | # Validate index 149 | if not query_results: 150 | st.warning("Error executing translated query. Rephrase the question and try again.") 151 | return 152 | 153 | with st.expander("Query Results"): 154 | df = convert_results_to_dataframe(query_results) 155 | st.dataframe(df) 156 | 157 | 158 | def convert_results_to_dataframe(results): 159 | """Convert query results to DataFrame.""" 160 | return pd.DataFrame(results) 161 | 162 | 163 | def show_thinking_process(index): 164 | """Display detective's thinking process in expandable section.""" 165 | with st.expander("Detective's Thinking Process"): 166 | st.write(st.session_state.context["detective_thinking"][index]) 167 | 168 | 169 | def game_rules_sidebar(): 170 | """Displays game rules, hints and tools in the sidebar.""" 171 | with st.sidebar: 172 | st.title("Detective's Handbook") 173 | 174 | with st.expander("Case Brief", expanded=True): 175 | st.markdown( 176 | """ 177 | **THE CASE** 178 | 179 | - Crime: Murder 180 | - Date: January 15, 2018 181 | - Location: SQL City 182 | - Status: Unsolved 183 | 184 | Begin by finding the crime scene report. 185 | """ 186 | ) 187 | 188 | with st.expander("Investigation Tips"): 189 | st.markdown( 190 | """ 191 | - Ask specific questions about people, places, or evidence 192 | - Follow leads from one piece of evidence to another 193 | - Look for connections between witnesses and suspects 194 | - Pay attention to alibis and timelines 195 | """ 196 | ) 197 | 198 | # Save notes in session state for persistence 199 | if "notes" not in st.session_state: 200 | st.session_state.notes = "" 201 | 202 | st.session_state.notes = st.text_area( 203 | label="Detective's Notes", 204 | value=st.session_state.notes, 205 | height=300, 206 | placeholder="Record your clues, suspects and theories here...", 207 | ) 208 | -------------------------------------------------------------------------------- /modules/database/db.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | from langchain_community.utilities import SQLDatabase 4 | 5 | from modules.settings import DB_FILE 6 | 7 | 8 | def execute_sql_query(extracted_query): 9 | """Connect to database and execute SQL query""" 10 | with sqlite3.connect(DB_FILE) as db_connection: 11 | cursor = db_connection.cursor() 12 | cursor.execute(extracted_query) 13 | results = cursor.fetchall() 14 | 15 | column_names = [description[0] for description in cursor.description] 16 | 17 | return convert_results_to_dict(results, column_names) 18 | 19 | 20 | def convert_results_to_dict(records, column_names): 21 | """Convert list of tuples to a list of dictionaries.""" 22 | return [dict(zip(column_names, row)) for row in records] 23 | 24 | 25 | def get_db_schema(): 26 | db = db_without_solution() 27 | return db.get_table_info() 28 | 29 | 30 | def database_connect(): 31 | """Establishes a connection to the SQL database using the provided URI.""" 32 | db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}") 33 | return db 34 | 35 | 36 | def db_without_solution(): 37 | """Connects to SQL database. Solution table is excluded.""" 38 | return SQLDatabase.from_uri(f"sqlite:///{DB_FILE}", ignore_tables=["solution"]) 39 | -------------------------------------------------------------------------------- /modules/database/sql-murder-mystery.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ne0bliviscaris/SQL-Detective-RAG-Game/bd36e67fe4d5ef8cbc816ed94f8ac8f78fcf8596/modules/database/sql-murder-mystery.db -------------------------------------------------------------------------------- /modules/model.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from langchain.chains.sql_database.query import create_sql_query_chain 4 | from langchain_core.output_parsers import StrOutputParser 5 | from langchain_core.prompts import PromptTemplate 6 | from langchain_core.runnables import RunnablePassthrough 7 | from langchain_ollama import ChatOllama 8 | 9 | from modules.database.db import db_without_solution, get_db_schema 10 | from modules.settings import MODEL 11 | from modules.tools import model_answer_regex 12 | 13 | 14 | class Model: 15 | """Base class for all models.""" 16 | 17 | def __init__(self, user_input, context=None): 18 | self.user_input = user_input 19 | self.context = context 20 | self.full_response = self.get_model_response() 21 | self.sql_query = self.get_field("sql_query") 22 | self.answer = self.get_field("answer") 23 | self.next_step = self.get_field("next_step") 24 | self.thinking = self.get_field("thinking") 25 | self.rules = self.get_field("rules_followed") 26 | 27 | def get_model_response(self): 28 | """Get response using instance attributes.""" 29 | try: 30 | langchain = self.build_langchain() 31 | response_str = langchain.invoke(self.model_input()) 32 | try: 33 | parsed_response = json.loads(response_str) 34 | return parsed_response 35 | except: 36 | return response_str 37 | except Exception as e: 38 | return f"Model Connection error. Make sure Ollama is running and {MODEL} is installed.\n{e}" 39 | 40 | def get_field(self, field=None): 41 | """Get SQL query from parsed_response.""" 42 | if isinstance(self.full_response, dict): 43 | return self.full_response.get(field) 44 | 45 | if isinstance(self.full_response, str): 46 | return model_answer_regex(self.full_response, field) 47 | return None 48 | 49 | def get_thinking_process(self): 50 | """Get thinking process from parsed_response.""" 51 | return self.response_json.get("thinking") 52 | 53 | 54 | class Translator(Model): 55 | """SQL Translator model class.""" 56 | 57 | def model_input(self): 58 | return { 59 | "input": self.user_input, 60 | "question": self.user_input, 61 | } 62 | 63 | def prompt(self) -> str: 64 | """Prompt template to translate text instructions into SQL query""" 65 | translator = """ 66 | **ROLE:** You are a SQL Translator. Your task is to translate the following question into a valid SQL query. Use {dialect} dialect. 67 | 68 | **Database Schema:** 69 | {table_info} 70 | 71 | **Rules:** 72 | 1. Ensure the output contains a valid SQL query. 73 | 2. The query must strictly follow the provided database schema and use only the available tables and columns. 74 | 3. Keep the thinking process brief, ensuring it logically aligns with the user input. 75 | 4. Avoid unnecessary complexity—only join tables or include conditions that are directly relevant to the user's question. 76 | 5. Fetch all columns by default using 'SELECT *', unless a specific column is mentioned in the input. 77 | 6. Be flexible in interpreting imprecise or incomplete user input while providing a valid SQL query. 78 | 7. Do not use your own knowledge or external sources. 79 | 8. Do not assume anything that is not explicitly present in the schema. 80 | 9. ONLY return the SQL query, no additional explanations or text. 81 | 10. If the user is asking about the order of items (first, last etc.), use an ORDER BY clause based on the relevant column. 82 | 11. While using columns from multiple tables, ensure you call proper tables. 83 | 84 | 85 | **Input:** {input} 86 | 87 | **Output:** 88 | ```json 89 | {{ 90 | "user_input": "{input}", 91 | "sql_query": "SELECT * FROM table_name WHERE condition;" 92 | "thinking": "Thinking process.", 93 | "rules_followed": "Rules followed while generating answer." 94 | }} 95 | top_k: {top_k} 96 | ``` 97 | """ 98 | return PromptTemplate( 99 | template=translator, 100 | input_variables=["input"], 101 | partial_variables={ 102 | "dialect": "sqlite", 103 | "table_info": get_db_schema(), 104 | }, 105 | ) 106 | 107 | def build_langchain(self): 108 | """Builds and returns a language chain with database and Ollama connections.""" 109 | db = db_without_solution() 110 | llm = ChatOllama( 111 | temperature=0, 112 | seed=1, 113 | model=MODEL, 114 | num_predict=512, # Output tokens limit 115 | top_p=0.95, 116 | format="json", 117 | mirostat=2, 118 | mirostat_eta=2, 119 | mirostat_tau=1, 120 | tfs_z=50, # reduce the impact of less probable tokens 121 | repeat_penalty=1.5, 122 | top_k=2, 123 | ) 124 | prompt = self.prompt() 125 | return create_sql_query_chain(llm, db, prompt) 126 | 127 | 128 | class Detective(Model): 129 | """Detective model for analyzing case data and providing conclusions.""" 130 | 131 | def model_input(self): 132 | return { 133 | "user_input": self.user_input, 134 | "context": self.context, 135 | } 136 | 137 | def prompt(self) -> str: 138 | """Prompt template to translate text instructions into SQL query""" 139 | detective = """ 140 | **ROLE:** You are a detective solving a case. Analyze the provided information in reference to the user's question. Your response must be concise, fact-based, and logically structured. 141 | 142 | **Rules:** 143 | 1. Provide an "answer" based only on the given context. 144 | 2. Suggest a "next_step" that logically follows from the answer. 145 | 3. Keep "thinking" brief but clear, explaining how the answer was derived. 146 | 4. Use only the received data—do not assume or use external knowledge. 147 | 5. Use exact column names from the query results in responses. 148 | 6. For **last, highest, or largest values**, return the maximum in the relevant column. 149 | 7. For **first, lowest, or smallest values**, return the minimum in the relevant column. 150 | 8. For **specific persons, objects, or events**, find an exact match in the data. 151 | 9. For **patterns, summaries, or trends**, analyze and summarize the provided data. 152 | 10. Dates are stored in the database as integers in the format YYYYMMDD. Make sure you read them properly. 153 | 11. If query results contain relevant data, extract and summarize key information. 154 | 12. If multiple records match, summarize them concisely. 155 | 13. If no relevant data is found, return `"answer": "No relevant data available."` 156 | 14. If the user input is ambiguous, provide a response based on the most likely interpretation. 157 | 15. Ensure the answer is correct—scan the received context carefully. 158 | 159 | **User_input:** {{user_input}} 160 | 161 | **Context:** {{context}} 162 | 163 | **Output:** 164 | ```json 165 | {{ 166 | "answer": "Your conclusion here.", 167 | "next_step": "Logical next step based on the answer.", 168 | "thinking": "Reasoning behind the answer and next step.", 169 | "user_input": "{user_input}", 170 | "rules_followed": "[List rules followed while generating answer.]" 171 | }} 172 | ``` 173 | """ 174 | return PromptTemplate( 175 | template=detective, 176 | input_variables=["user_input", "context"], 177 | partial_variables={ 178 | "top_k": 1, 179 | }, 180 | ) 181 | 182 | def build_langchain(self): 183 | """Builds and returns a language chain with database and Ollama connections.""" 184 | llm = ChatOllama( 185 | temperature=0, 186 | seed=1, 187 | model=MODEL, 188 | num_predict=1024, # Output tokens limit 189 | top_p=0.95, 190 | format="json", 191 | mirostat=2, 192 | mirostat_eta=2, 193 | mirostat_tau=1, 194 | tfs_z=50, # reduce the impact of less probable tokens 195 | repeat_penalty=1.5, 196 | ) 197 | prompt = self.prompt() 198 | 199 | return ( 200 | {"user_input": RunnablePassthrough(), "context": lambda x: self.context} | prompt | llm | StrOutputParser() 201 | ) 202 | -------------------------------------------------------------------------------- /modules/settings.py: -------------------------------------------------------------------------------- 1 | # Easy Model Switch 2 | # MODEL = "llama3" 3 | # MODEL = "zephyr:7b" 4 | MODEL = "deepseek-r1:1.5b" 5 | # MODEL = "llama3.2:1b" 6 | 7 | # Switched to local model 8 | # if MODEL == "llama3": 9 | # MODEL_URL = "http://localhost:11434" 10 | # elif MODEL == "zephyr:7b": 11 | # MODEL_URL = "http://localhost:55555" 12 | 13 | 14 | DB_FILE = r"modules\database\sql-murder-mystery.db" 15 | -------------------------------------------------------------------------------- /modules/tools.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def model_answer_regex(response, field_name): 5 | """Extract field value from JSON-like text response.""" 6 | FIELD = rf'"{field_name}"' # Field name in quotes 7 | SEPARATOR = r"\s*:\s*" # Colon : with optional spaces 8 | QUOTE = r'"' # Opening quote ' 9 | CONTENT = r'([^"]*)' # Capture group for field content 10 | END = r'(?:"|$)' # Closing quote ' or end of string 11 | 12 | regex = f"{FIELD}{SEPARATOR}{QUOTE}{CONTENT}{END}" 13 | match = re.search(regex, response, re.VERBOSE) 14 | 15 | if match: 16 | return match.group(1) 17 | return None 18 | 19 | 20 | def convert_list_to_string(results, columns): 21 | """Convert list of tuples to string with column names""" 22 | results_str = f"{', '.join(columns)}\n" 23 | for row in results: 24 | results_str += str(row) + "\n" 25 | return results_str 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.3.20 2 | langchain_community==0.3.19 3 | langchain_core==0.3.41 4 | langchain_ollama==0.2.3 5 | pandas==2.2.3 6 | streamlit==1.39.0 7 | -------------------------------------------------------------------------------- /translator_test.py: -------------------------------------------------------------------------------- 1 | from modules.model import Translator 2 | 3 | response = Translator("find all murder cases from SQL City") 4 | # response = Translator("Find who lives in the last house in Franklin Ave. Order house numbers") 5 | # response = Translator("Find who lives on the Franklin Ave") 6 | # response = Translator("find all people with blue eyes. drivers_license has info about eye color") 7 | # response = Translator("find names of all people with blue eyes. Eyes color is in drivers_license table") 8 | # response = Translator("find all people with blue eyes. 'drivers_license' table has info about 'eye_color'") 9 | # response = Translator("show all police murder case reports") 10 | 11 | 12 | print((response.full_response)) 13 | print("\nQuery:\n", response.sql_query) 14 | print("\nThinking:\n", response.thinking) 15 | # print("Prompt:\n", response.prompt()) 16 | --------------------------------------------------------------------------------