├── .gitignore ├── .streamlit └── config.toml ├── README.md ├── data └── marketing.db ├── docs ├── images │ └── init_page.PNG └── video │ └── test.gif ├── requirements.txt └── src ├── __init__.py ├── generative_ai.py ├── streamlit_app.py └── utilities ├── __init__.py └── config.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pyc 3 | -------------------------------------------------------------------------------- /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | # Below are all the sections and options you can have in ~/.streamlit/config.toml. 2 | 3 | [global] 4 | 5 | # By default, Streamlit checks if the Python watchdog module is available and, if not, prints a warning asking for you to install it. The watchdog module is not required, but highly recommended. It improves Streamlit's ability to detect changes to files in your filesystem. 6 | # If you'd like to turn off this warning, set this to True. 7 | # Default: false 8 | disableWatchdogWarning = false 9 | 10 | # If True, will show a warning when you run a Streamlit-enabled script via "python my_script.py". 11 | # Default: true 12 | showWarningOnDirectExecution = true 13 | 14 | # DataFrame serialization. 15 | # Acceptable values: - 'legacy': Serialize DataFrames using Streamlit's custom format. Slow but battle-tested. - 'arrow': Serialize DataFrames using Apache Arrow. Much faster and versatile. 16 | # Default: "arrow" 17 | dataFrameSerialization = "arrow" 18 | 19 | 20 | [logger] 21 | 22 | # Level of logging: 'error', 'warning', 'info', or 'debug'. 23 | # Default: 'info' 24 | level = "info" 25 | 26 | # String format for logging messages. If logger.datetimeFormat is set, logger messages will default to `%(asctime)s.%(msecs)03d %(message)s`. See [Python's documentation](https://docs.python.org/2.6/library/logging.html#formatter-objects) for available attributes. 27 | # Default: None 28 | messageFormat = "%(asctime)s %(message)s" 29 | 30 | 31 | [client] 32 | 33 | # Whether to enable st.cache. 34 | # Default: true 35 | caching = true 36 | 37 | # If false, makes your Streamlit script not draw to a Streamlit app. 38 | # Default: true 39 | displayEnabled = true 40 | 41 | # Controls whether uncaught app exceptions are displayed in the browser. By default, this is set to True and Streamlit displays app exceptions and associated tracebacks in the browser. 42 | # If set to False, an exception will result in a generic message being shown in the browser, and exceptions and tracebacks will be printed to the console only. 43 | # Default: true 44 | showErrorDetails = true 45 | 46 | 47 | [runner] 48 | 49 | # Allows you to type a variable or string by itself in a single line of Python code to write it to the app. 50 | # Default: true 51 | magicEnabled = true 52 | 53 | # Install a Python tracer to allow you to stop or pause your script at any point and introspect it. As a side-effect, this slows down your script's execution. 54 | # Default: false 55 | installTracer = false 56 | 57 | # Sets the MPLBACKEND environment variable to Agg inside Streamlit to prevent Python crashing. 58 | # Default: true 59 | fixMatplotlib = true 60 | 61 | # Run the Python Garbage Collector after each script execution. This can help avoid excess memory use in Streamlit apps, but could introduce delay in rerunning the app script for high-memory-use applications. 62 | # Default: true 63 | postScriptGC = true 64 | 65 | 66 | [server] 67 | 68 | # List of folders that should not be watched for changes. This impacts both "Run on Save" and @st.cache. 69 | # Relative paths will be taken as relative to the current working directory. 70 | # Example: ['/home/user1/env', 'relative/path/to/folder'] 71 | # Default: [] 72 | folderWatchBlacklist = [] 73 | 74 | # Change the type of file watcher used by Streamlit, or turn it off completely. 75 | # Allowed values: * "auto" : Streamlit will attempt to use the watchdog module, and falls back to polling if watchdog is not available. * "watchdog" : Force Streamlit to use the watchdog module. * "poll" : Force Streamlit to always use polling. * "none" : Streamlit will not watch files. 76 | # Default: "auto" 77 | fileWatcherType = "auto" 78 | 79 | # Symmetric key used to produce signed cookies. If deploying on multiple replicas, this should be set to the same value across all replicas to ensure they all share the same secret. 80 | # Default: randomly generated secret key. 81 | cookieSecret = "2f926bcba7c65336ef8e432c52ab0673a06b9f29860e5202008a3d5847b876bd" 82 | 83 | # If false, will attempt to open a browser window on start. 84 | # Default: false unless (1) we are on a Linux box where DISPLAY is unset, or (2) we are running in the Streamlit Atom plugin. 85 | headless = false 86 | 87 | # Automatically rerun script when the file is modified on disk. 88 | # Default: false 89 | runOnSave = false 90 | 91 | # The address where the server will listen for client and browser connections. Use this if you want to bind the server to a specific address. If set, the server will only be accessible from this address, and not from any aliases (like localhost). 92 | # Default: (unset) 93 | #address = 94 | 95 | # The port where the server will listen for browser connections. 96 | # Default: 8501 97 | port = 8501 98 | 99 | # The base path for the URL where Streamlit should be served from. 100 | # Default: "" 101 | baseUrlPath = "" 102 | 103 | # Enables support for Cross-Origin Request Sharing (CORS) protection, for added security. 104 | # Due to conflicts between CORS and XSRF, if `server.enableXsrfProtection` is on and `server.enableCORS` is off at the same time, we will prioritize `server.enableXsrfProtection`. 105 | # Default: true 106 | enableCORS = true 107 | 108 | # Enables support for Cross-Site Request Forgery (XSRF) protection, for added security. 109 | # Due to conflicts between CORS and XSRF, if `server.enableXsrfProtection` is on and `server.enableCORS` is off at the same time, we will prioritize `server.enableXsrfProtection`. 110 | # Default: true 111 | enableXsrfProtection = true 112 | 113 | # Max size, in megabytes, for files uploaded with the file_uploader. 114 | # Default: 200 115 | maxUploadSize = 200 116 | 117 | # Max size, in megabytes, of messages that can be sent via the WebSocket connection. 118 | # Default: 200 119 | maxMessageSize = 200 120 | 121 | # Enables support for websocket compression. 122 | # Default: true 123 | enableWebsocketCompression = true 124 | 125 | 126 | [browser] 127 | 128 | # Internet address where users should point their browsers in order to connect to the app. Can be IP address or DNS name and path. 129 | # This is used to: - Set the correct URL for CORS and XSRF protection purposes. - Show the URL on the terminal - Open the browser 130 | # Default: 'localhost' 131 | serverAddress = "localhost" 132 | 133 | # Whether to send usage statistics to Streamlit. 134 | # Default: true 135 | gatherUsageStats = true 136 | 137 | # Port where users should point their browsers in order to connect to the app. 138 | # This is used to: - Set the correct URL for CORS and XSRF protection purposes. - Show the URL on the terminal - Open the browser 139 | # Default: whatever value is set in server.port. 140 | serverPort = 8501 141 | 142 | 143 | [ui] 144 | 145 | # Flag to hide most of the UI elements found at the top of a Streamlit app. 146 | # NOTE: This does *not* hide the hamburger menu in the top-right of an app. 147 | # Default: false 148 | hideTopBar = false 149 | 150 | 151 | [mapbox] 152 | 153 | # Configure Streamlit to use a custom Mapbox token for elements like st.pydeck_chart and st.map. To get a token for yourself, create an account at https://mapbox.com. It's free (for moderate usage levels)! 154 | # Default: "" 155 | token = "" 156 | 157 | 158 | [deprecation] 159 | 160 | # Set to false to disable the deprecation warning for the file uploader encoding. 161 | # Default: true 162 | showfileUploaderEncoding = true 163 | 164 | # Set to false to disable the deprecation warning for using the global pyplot instance. 165 | # Default: true 166 | showPyplotGlobalUse = true 167 | 168 | [theme] 169 | 170 | primaryColor="#d33682" 171 | backgroundColor="#002b36" 172 | secondaryBackgroundColor="#586e75" 173 | textColor="#fafafa" 174 | font="sans serif" 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Natural Language to SQL Query Execution Application 2 | 3 | ### Overview 4 | 5 | The Natural Language to SQL Query Execution Application is a cutting-edge Python project that seamlessly blends the power of Natural Language Processing (NLP) with SQL database queries. This interactive application allows users, regardless of their SQL knowledge, to effortlessly execute SQL queries using intuitive and human-readable natural language inputs. 6 | 7 | ![Getting Started](docs/video/test.gif) 8 | 9 | ### Why it's Useful 10 | 11 | The application is highly valuable as it enables individuals without SQL knowledge to effortlessly query the database using natural language. In this specific use case, consider a scenario where we have customer data, including the probability of purchase. A marketing professional can use plain English to query for customers with the highest likelihood of making a purchase. This intuitive approach empowers marketing teams to make data-driven decisions and target high-probability customer segments effectively. With real-time insights at their fingertips, marketers can focus on converting potential leads with ease and efficiency. 12 | 13 | ### Technology Stack 14 | 15 | The project leverages the following technologies: 16 | 17 | - Python: The primary programming language used for developing the application. 18 | - Streamlit: A user-friendly web framework for creating interactive data apps, serving as the user interface. 19 | - OpenAI GPT-3.5 Turbo: The advanced NLP language model used for transforming natural language prompts into SQL queries. 20 | - SQLAlchemy: A powerful SQL toolkit and Object-Relational Mapper (ORM) utilized for database connections. 21 | 22 | ### Getting Started 23 | 24 | To launch the application, follow these steps: 25 | 26 | 1. Clone the repository to your local machine. 27 | 2. Install the required dependencies using `pip install -r requirements.txt`. 28 | 3. Open your terminal and navigate to the project's root directory. 29 | 4. Run the following command: 30 | 31 | ```bash 32 | streamlit run yourpath/SQLNaturaLanguage/src/streamlit_app.py 33 | ``` 34 | 35 | ## Features 36 | 37 | - Intuitive Chat-Like Interface: Users can interact with the application using a user-friendly and engaging chat-like interface, making SQL queries easy and approachable. 38 | - Natural Language Processing: The NLP-powered language model understands natural language prompts and generates SQL queries accordingly. 39 | - Database Interaction: The application connects seamlessly to the specified SQL database, executing queries and returning results in real-time. 40 | - Error Handling: Comprehensive error handling ensures a smooth user experience even in the presence of invalid inputs. 41 | 42 | ## Usage 43 | 44 | 1. Enter your SQL database credentials and API key in the appropriate configuration files. 45 | 2. Access the application using the provided terminal command. 46 | 3. Input your SQL-related questions or queries in simple English language. 47 | 4. Observe the application transforming your natural language inputs into valid SQL queries and displaying query results. 48 | 49 | ## Contributions 50 | 51 | We welcome contributions to enhance the application's capabilities and usability. To contribute, please follow the guidelines in the `CONTRIBUTING.md` file. 52 | 53 | 54 | 55 | Let's make database interactions a breeze with the power of natural language and SQL integration! Feel free to share your feedback and ideas to improve the application further. Happy querying! 😎🚀 56 | 57 | 58 | Structure 59 | 60 | ``` 61 | SQLNaturaLanguage 62 | ├─ .streamlit 63 | │ └─ config.toml 64 | ├─ data 65 | │ └─ marketing.db 66 | ├─ docs 67 | │ ├─ images 68 | │ │ └─ init_page.PNG 69 | │ └─ video 70 | │ └─ test.gif 71 | ├─ README.md 72 | ├─ requirements.txt 73 | ├─ src 74 | │ ├─ generative_ai.py 75 | │ ├─ streamlit_app.py 76 | │ ├─ utilities 77 | │ │ ├─ config.py 78 | │ │ ├─ __init__.py 79 | │ │ └─ __pycache__ 80 | │ ├─ __init__.py 81 | │ └─ __pycache__ 82 | └─ __pycache__ 83 | 84 | ``` -------------------------------------------------------------------------------- /data/marketing.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/damiangilgonzalez1995/SQLNaturaLanguage/f57a46c1e824f11c26979ea4420b83c82c4ceb89/data/marketing.db -------------------------------------------------------------------------------- /docs/images/init_page.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/damiangilgonzalez1995/SQLNaturaLanguage/f57a46c1e824f11c26979ea4420b83c82c4ceb89/docs/images/init_page.PNG -------------------------------------------------------------------------------- /docs/video/test.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/damiangilgonzalez1995/SQLNaturaLanguage/f57a46c1e824f11c26979ea4420b83c82c4ceb89/docs/video/test.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/damiangilgonzalez1995/SQLNaturaLanguage/f57a46c1e824f11c26979ea4420b83c82c4ceb89/requirements.txt -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/damiangilgonzalez1995/SQLNaturaLanguage/f57a46c1e824f11c26979ea4420b83c82c4ceb89/src/__init__.py -------------------------------------------------------------------------------- /src/generative_ai.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sqlalchemy as sql 3 | from langchain.chat_models import ChatOpenAI 4 | from langchain.sql_database import SQLDatabase 5 | from langchain.chains import SQLDatabaseChain 6 | from utilities.config import * 7 | 8 | 9 | 10 | class SQLNaturaLanguage(): 11 | def __init__(self, sql_engine, API_KEY, temperature=0, model="gpt-3.5-turbo") -> None: 12 | """ 13 | Initialize the SQLNaturaLanguage class. 14 | 15 | :param sql_engine: The SQL engine to connect to the database. 16 | :type sql_engine: SQLAlchemy engine 17 | :param API_KEY: The API key for the NaturaLanguage service. 18 | :type API_KEY: str 19 | :param temperature: The temperature setting for text generation (default is 0). 20 | :type temperature: float 21 | :param model: The language model to use for natural language processing (default is "gpt-3.5-turbo"). 22 | :type model: str 23 | """ 24 | self.db = SQLDatabase(engine=sql_engine) 25 | self.conn = sql_engine.connect() 26 | self.API_KEY = API_KEY 27 | self.temperature=temperature 28 | self.model=model 29 | self.sql_model = self.__create_sqlchain() 30 | 31 | def __create_model(self): 32 | """ 33 | Create and initialize the language model. 34 | 35 | :return: The language model 36 | for natural language processing. 37 | :rtype: ChatOpenAI 38 | """ 39 | llm = ChatOpenAI( 40 | temperature=self.temperature, 41 | model=self.model, 42 | openai_api_key=self.API_KEY 43 | ) 44 | return llm 45 | 46 | def __create_sqlchain(self): 47 | """ 48 | Create and initialize the SQLDatabaseChain. 49 | 50 | :return: The SQLDatabaseChain for executing 51 | SQL queries based on natural language. 52 | :rtype: SQLDatabaseChain 53 | """ 54 | db_chain = SQLDatabaseChain.from_llm( 55 | llm=self.__create_model(), 56 | db=self.db, 57 | verbose=True, 58 | return_intermediate_steps=True 59 | ) 60 | return db_chain 61 | 62 | def __execution_query(self, prompt=None): 63 | """ 64 | Execute the SQL query generation and database execution. 65 | 66 | :param prompt: The natural language prompt to generate the SQL query. 67 | :type prompt: str 68 | :return: The result of the SQL query execution. 69 | :rtype: dict 70 | """ 71 | prompt = prompt if prompt is not None else QUESTION_GENERATIVE_AI 72 | 73 | try: 74 | res = self.sql_model(prompt) 75 | result = res["result"] 76 | query_sql = [elem["sql_cmd"] for elem in res["intermediate_steps"] if "sql_cmd" in elem][0] 77 | query_df = pd.read_sql_query(sql.text(query_sql), self.conn) 78 | 79 | return { 80 | "result": result, 81 | "query_sql": query_sql, 82 | "query_df": query_df 83 | } 84 | 85 | except Exception as error: 86 | return {"error": error} 87 | 88 | 89 | def execution(self, prompt=None): 90 | """ 91 | Execute the main query execution process. 92 | 93 | :param prompt: The natural language prompt to generate the SQL query. 94 | :type prompt: str 95 | :return: The final result of the SQL query execution. 96 | :rtype: dict 97 | """ 98 | final_result = self.__execution_query(prompt=prompt) 99 | return final_result 100 | 101 | 102 | 103 | 104 | 105 | # sql_engine = sql.create_engine("sqlite:///data/marketing.db") 106 | 107 | # API_KEY = API_KEY 108 | 109 | # sqlnaturalanguage_object = SQLNaturaLanguage(sql_engine=sql_engine, API_KEY=API_KEY) 110 | 111 | # response = sqlnaturalanguage_object.execution(prompt="What are the best clients?") 112 | 113 | # print(response) 114 | 115 | -------------------------------------------------------------------------------- /src/streamlit_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from utilities.config import * 3 | # from ..generative_ai import SQLNaturaLanguage 4 | # from ..generative_ai import SQLNaturaLanguage 5 | from generative_ai import SQLNaturaLanguage 6 | # streamlit config show > project/.streamlit/config.toml 7 | import pandas as pd 8 | import sqlalchemy as sql 9 | 10 | class App_queries_naturallanguage(): 11 | def __init__(self, sql_engine, API_KEY, temperature=0, model="gpt-3.5-turbo"): 12 | """ 13 | Initialize the App_queries_naturallanguage class. 14 | 15 | :param sql_engine: The SQL engine to connect to the database. 16 | :type sql_engine: SQLAlchemy engine 17 | :param API_KEY: The API key for the NaturaLanguage service. 18 | :type API_KEY: str 19 | :param temperature: The temperature setting for text generation (default is 0). 20 | :type temperature: float 21 | :param model: The language model to use for natural language processing (default is "gpt-3.5-turbo"). 22 | :type model: str 23 | """ 24 | 25 | self.conn = sql_engine.connect() 26 | self.sql_engine = sql_engine 27 | self.API_KEY = API_KEY 28 | self.temperature=temperature 29 | self.model=model 30 | 31 | 32 | 33 | def _call_llm_sql(self, question=None, number_rows=10): 34 | """ 35 | Call the SQLNaturaLanguage model to execute an SQL query based on the user's question. 36 | 37 | :param question: The user's input question to generate the SQL query. 38 | :type question: str 39 | :param number_rows: The number of rows to limit the SQL query results (default is 10). 40 | :type number_rows: int 41 | :return: The result of the SQL query execution. 42 | :rtype: dict 43 | """ 44 | 45 | sql_model = SQLNaturaLanguage(API_KEY=self.API_KEY, sql_engine=self.sql_engine, temperature=self.temperature, model=self.model) 46 | 47 | prompt = PROMPT 48 | prompt = prompt.format(question=question, number_rows=number_rows) 49 | result = sql_model.execution(prompt=prompt) 50 | return result 51 | 52 | 53 | 54 | def execute(self): 55 | """ 56 | Execute the main application. 57 | 58 | This function sets up the layout and handles user interactions to execute SQL queries and display results. 59 | """ 60 | 61 | st.set_page_config(layout="wide",page_icon="🧊") 62 | st.title("Lead Scoring Analyzer") 63 | 64 | 65 | # Set up the layout with two columns 66 | # col1, col2 = st.columns([1, 1]) 67 | col1, col2 = st.columns(2) 68 | 69 | # Column on the left for chat 70 | with col2: 71 | 72 | st.title("Inquiries") 73 | number_rows = self.__number_result() 74 | self.__init_session() 75 | 76 | # Create an input box for the user to type messages 77 | user_input = st.text_input(label=f"I want to know the **:red[{number_rows}]** ..." , 78 | value="Best clients we have", 79 | key="placeholder") 80 | 81 | if st.button("ASK"): 82 | # Collect the message and store it in a variable 83 | output = self._call_llm_sql(question=user_input, number_rows=number_rows) 84 | 85 | st.session_state.past.append(user_input) 86 | st.session_state.generated.append(output) 87 | 88 | if st.session_state["generated"]: 89 | 90 | for i in range(len(st.session_state["generated"])-1, -1, -1): 91 | self.__queryseparator(i) 92 | chat_message = st.session_state["generated"][i] 93 | 94 | if "error" in chat_message: 95 | self.__error_show(chat_message) 96 | 97 | else: 98 | self.__show_result(chat_message) 99 | 100 | 101 | # Column on the right for clients table view 102 | with col1: 103 | self.__show_tables() 104 | 105 | 106 | def __queryseparator(self, i): 107 | """ 108 | Display a separator for each generated query. 109 | 110 | :param i: The index of the generated query. 111 | :type i: int 112 | """ 113 | 114 | st.text(f"....................................... QUERY {i+1} .......................................") 115 | st.text( "QUESTION: " + st.session_state["past"][i]) 116 | 117 | def __show_tables(self): 118 | """ 119 | Display the database tables in different tabs. 120 | 121 | This function shows the tables in different tabs 122 | """ 123 | 124 | st.header("Data Base Tables") 125 | 126 | 127 | obj = sql.MetaData() 128 | obj.reflect(bind=self.conn) 129 | 130 | tables_list = list(obj.tables.keys()) 131 | tabs = st.tabs(tables_list) 132 | 133 | for i, table in enumerate(tables_list): 134 | with tabs[i]: 135 | st.header(f"Table: {table}") 136 | st.dataframe(pd.read_sql_table(table, self.conn)) 137 | 138 | 139 | 140 | def __error_show(self, chat_message): 141 | """ 142 | Display an error message. 143 | 144 | :param chat_message: The error message to be displayed. 145 | :type chat_message: str 146 | """ 147 | 148 | st.text( "ERROR: ") 149 | st.code(chat_message["error"], language="python", line_numbers=True) 150 | 151 | def __show_result(self, chat_message): 152 | """ 153 | Display the response of the generated query. 154 | 155 | :param chat_message: The response of the generated query. 156 | :type chat_message: dict 157 | """ 158 | 159 | st.text( "RESPONSE: ") 160 | for key in chat_message.keys(): 161 | 162 | if "result"==key: 163 | st.text(chat_message[key]) 164 | elif "query_sql"==key: 165 | st.code(chat_message[key], language="sql", line_numbers=True) 166 | elif "query_df"==key: 167 | st.dataframe(chat_message[key]) 168 | 169 | 170 | def __number_result(self): 171 | """ 172 | Prompt the user to choose the number of rows to display in the query results. 173 | 174 | :return: The number of rows selected by the user. 175 | :rtype: int 176 | """ 177 | 178 | number_rows = st.slider( 179 | "How many result do you want?", 180 | value=5, 181 | step=1, 182 | max_value=10, 183 | min_value=1 184 | ) 185 | 186 | return number_rows 187 | 188 | def __init_session(self): 189 | """ 190 | Initialize the session state for generated queries and past user inputs. 191 | """ 192 | 193 | if "generated" not in st.session_state: 194 | st.session_state["generated"] = [] 195 | 196 | if "past" not in st.session_state: 197 | st.session_state["past"] = [] 198 | 199 | 200 | if __name__ == "__main__": 201 | 202 | # Connection with the database 203 | sql_engine = sql.create_engine("sqlite:///data/marketing.db") 204 | 205 | API_KEY = API_KEY 206 | 207 | app_class = App_queries_naturallanguage(sql_engine=sql_engine, API_KEY=API_KEY) 208 | 209 | app_class.execute() 210 | -------------------------------------------------------------------------------- /src/utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/damiangilgonzalez1995/SQLNaturaLanguage/f57a46c1e824f11c26979ea4420b83c82c4ceb89/src/utilities/__init__.py -------------------------------------------------------------------------------- /src/utilities/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | PROMPT = """ 3 | Respond to the next answer. Keep in mind that the query that is created must be explicit at all times from which table the feature comes. 4 | Take in mind: 5 | - Adding the table name when referencing columns that are specified in multiple table 6 | - The limit of row is {number_rows} 7 | {question} 8 | """ 9 | 10 | 11 | QUESTION_GENERATIVE_AI = "How many clients do I have?" 12 | API_KEY = os.environ['OPENAI_API_KEY'] 13 | # KEY_OPENAI = "yourkey" 14 | 15 | # or you can use API_KEY = os.environ['OPENAI_API_KEY'] 16 | --------------------------------------------------------------------------------