├── CSV.png ├── LICENSE ├── README.md ├── app.py └── requirements.txt /CSV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Safiullah-Rahu/CSV-AI/7ecfc4b892551b122e30809bf119c2293c4e43f2/CSV.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Safiullah Rahu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSV-AI 🧠 2 | 3 | CSV-AI is the ultimate app powered by LangChain, OpenAI, and Streamlit that allows you to unlock hidden insights in your CSV files. With CSV-AI, you can effortlessly interact with, summarize, and analyze your CSV files in one convenient place. 4 | 5 | ## Features 6 | 7 | CSV-AI offers the following key features: 8 | 9 | - **Interact:** Easily navigate through your CSV files and interact with the data. 10 | - **Summarize:** Generate descriptive summaries for your CSV data. 11 | - **Analyze:** Perform advanced data analysis on your CSV files, including filtering, sorting, and visualizing the data. 12 | 13 | ## Installation 14 | 15 | To run CSV-AI, follow these steps: 16 | 17 | 1. Clone this repository to your local machine. 18 | 2. Navigate to the project directory. 19 | 20 | ```bash 21 | git clone https://github.com/Safiullah-Rahu/CSV-AI.git 22 | cd csv-ai 23 | ``` 24 | 3. Install the required packages using `pip` with the provided `requirements.txt` file. 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | ## Usage 29 | 30 | To start CSV-AI, run the following command: 31 | ```bash 32 | streamlit run app.py 33 | ``` 34 | This command will launch the CSV-AI app in your default web browser. You can then start exploring and analyzing your CSV files. 35 | 36 | CSV-AI App Homepage 37 | 38 | ## Feedback and Contributions 39 | If you have any feedback, suggestions, or issues related to CSV-AI, please open an issue on the GitHub repository. Contributions are also welcome! If you would like to contribute to CSV-AI, please follow the guidelines outlined in the Contribution Guidelines. 40 | 41 | ## License 42 | CSV-AI is licensed under the MIT License. 43 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import streamlit as st 3 | import os, tempfile 4 | import pandas as pd 5 | from langchain.chat_models import ChatOpenAI 6 | from langchain_community.vectorstores import FAISS 7 | from langchain.embeddings import OpenAIEmbeddings 8 | from langchain_text_splitters import RecursiveCharacterTextSplitter 9 | from langchain_community.document_loaders import CSVLoader 10 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 11 | from langchain_core.runnables.history import RunnableWithMessageHistory 12 | from langchain_community.chat_message_histories import ChatMessageHistory 13 | from langchain_core.chat_history import BaseChatMessageHistory 14 | from langchain.chains.summarize import load_summarize_chain 15 | from langchain_core.prompts import MessagesPlaceholder 16 | from langchain_experimental.agents import create_pandas_dataframe_agent 17 | import asyncio 18 | 19 | st.set_page_config(page_title="CSV AI", layout="wide") 20 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 21 | 22 | def home_page(): 23 | st.write("""Select any one feature from above sliderbox: \n 24 | 1. Chat with CSV \n 25 | 2. Summarize CSV \n 26 | 3. Analyze CSV """) 27 | 28 | @st.cache_resource() 29 | def retriever_func(uploaded_file): 30 | if uploaded_file : 31 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file: 32 | tmp_file.write(uploaded_file.getvalue()) 33 | tmp_file_path = tmp_file.name 34 | try: 35 | loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8") 36 | data = loader.load() 37 | except: 38 | loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252") 39 | data = loader.load() 40 | 41 | text_splitter = RecursiveCharacterTextSplitter( 42 | chunk_size=1000, 43 | chunk_overlap=200, 44 | add_start_index=True 45 | ) 46 | all_splits = text_splitter.split_documents(data) 47 | 48 | 49 | vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) 50 | retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6}) 51 | if not uploaded_file: 52 | st.info("Please upload CSV documents to continue.") 53 | st.stop() 54 | return retriever, vectorstore 55 | 56 | def chat(temperature, model_name): 57 | st.write("# Talk to CSV") 58 | # Add functionality for Page 1 59 | reset = st.sidebar.button("Reset Chat") 60 | uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv") 61 | retriever, vectorstore = retriever_func(uploaded_file) 62 | llm = ChatOpenAI(model_name=model_name, temperature=temperature, streaming=True) 63 | 64 | def format_docs(docs): 65 | return "\n\n".join(doc.page_content for doc in docs) 66 | 67 | if "messages" not in st.session_state: 68 | st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] 69 | 70 | store = {} 71 | 72 | prompt = ChatPromptTemplate.from_messages( 73 | [ 74 | ( 75 | "system", 76 | """Use the following pieces of context to answer the question at the end. 77 | If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context}""", 78 | ), 79 | MessagesPlaceholder(variable_name="history"), 80 | ("human", "{input}"), 81 | ] 82 | ) 83 | runnable = prompt | llm 84 | 85 | def get_session_history(session_id: str) -> BaseChatMessageHistory: 86 | if session_id not in store: 87 | store[session_id] = ChatMessageHistory() 88 | return store[session_id] 89 | 90 | 91 | with_message_history = RunnableWithMessageHistory( 92 | runnable, 93 | get_session_history, 94 | input_messages_key="input", 95 | history_messages_key="history", 96 | ) 97 | 98 | for msg in st.session_state.messages: 99 | st.chat_message(msg["role"]).write(msg["content"]) 100 | async def chat_message(): 101 | if prompt := st.chat_input(): 102 | if not user_api_key: 103 | st.info("Please add your OpenAI API key to continue.") 104 | st.stop() 105 | st.session_state.messages.append({"role": "user", "content": prompt}) 106 | st.chat_message("user").write(prompt) 107 | contextt = vectorstore.similarity_search(prompt, k=6) 108 | context = "\n\n".join(doc.page_content for doc in contextt) 109 | #msg = 110 | with st.chat_message("assistant"): 111 | message_placeholder = st.empty() 112 | text_chunk = "" 113 | async for chunk in with_message_history.astream( 114 | {"context": context, "input": prompt}, 115 | config={"configurable": {"session_id": "abc123"}}, 116 | ): 117 | text_chunk += chunk.content 118 | message_placeholder.markdown(text_chunk) 119 | #st.chat_message("assistant").write(text_chunk) 120 | st.session_state.messages.append({"role": "assistant", "content": text_chunk}) 121 | if reset: 122 | st.session_state["messages"] = [] 123 | asyncio.run(chat_message()) 124 | 125 | 126 | def summary(model_name, temperature, top_p): 127 | st.write("# Summary of CSV") 128 | st.write("Upload your document here:") 129 | uploaded_file = st.file_uploader("Upload source document", type="csv", label_visibility="collapsed") 130 | if uploaded_file is not None: 131 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file: 132 | tmp_file.write(uploaded_file.getvalue()) 133 | tmp_file_path = tmp_file.name 134 | # encoding = cp1252 135 | text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap=100) 136 | try: 137 | loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252") 138 | #loader = UnstructuredFileLoader(tmp_file_path) 139 | data = loader.load() 140 | texts = text_splitter.split_documents(data) 141 | except: 142 | loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8") 143 | #loader = UnstructuredFileLoader(tmp_file_path) 144 | data = loader.load() 145 | texts = text_splitter.split_documents(data) 146 | 147 | os.remove(tmp_file_path) 148 | gen_sum = st.button("Generate Summary") 149 | if gen_sum: 150 | # Initialize the OpenAI module, load and run the summarize chain 151 | llm = ChatOpenAI(model_name=model_name, temperature=temperature) 152 | chain = load_summarize_chain( 153 | llm=llm, 154 | chain_type="map_reduce", 155 | 156 | return_intermediate_steps=True, 157 | input_key="input_documents", 158 | output_key="output_text", 159 | ) 160 | result = chain({"input_documents": texts}, return_only_outputs=True) 161 | 162 | st.success(result["output_text"]) 163 | 164 | 165 | def analyze(temperature, model_name): 166 | st.write("# Analyze CSV") 167 | #st.write("This is Page 3") 168 | # Add functionality for Page 3 169 | reset = st.sidebar.button("Reset Chat") 170 | uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv") 171 | #.write(uploaded_file.name) 172 | if uploaded_file is not None: 173 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file: 174 | tmp_file.write(uploaded_file.getvalue()) 175 | tmp_file_path = tmp_file.name 176 | df = pd.read_csv(tmp_file_path) 177 | llm = ChatOpenAI(model=model_name, temperature=temperature) 178 | agent = create_pandas_dataframe_agent(llm, df, agent_type="openai-tools", verbose=True) 179 | 180 | if "messages" not in st.session_state: 181 | st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] 182 | 183 | for msg in st.session_state.messages: 184 | st.chat_message(msg["role"]).write(msg["content"]) 185 | 186 | if prompt := st.chat_input(placeholder="What are the names of the columns?"): 187 | if not user_api_key: 188 | st.info("Please add your OpenAI API key to continue.") 189 | st.stop() 190 | st.session_state.messages.append({"role": "user", "content": prompt}) 191 | st.chat_message("user").write(prompt) 192 | msg = agent.invoke({"input": prompt, "chat_history": st.session_state.messages}) 193 | st.session_state.messages.append({"role": "assistant", "content": msg["output"]}) 194 | st.chat_message("assistant").write(msg["output"]) 195 | if reset: 196 | st.session_state["messages"] = [] 197 | 198 | 199 | # Main App 200 | def main(): 201 | st.markdown( 202 | """ 203 |
204 |

🧠 CSV AI

205 |
206 | """, 207 | unsafe_allow_html=True, 208 | ) 209 | st.markdown( 210 | """ 211 |
212 |

⚡️ Interacting, Analyzing and Summarizing CSV Files!

213 |
214 | """, 215 | unsafe_allow_html=True, 216 | ) 217 | global user_api_key 218 | # # 219 | # st.sidebar.write("---Made with ❤️---") 220 | # st.sidebar.write("---") 221 | if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None: 222 | user_api_key = os.environ["OPENAI_API_KEY"] 223 | st.success("API key loaded from .env", icon="🚀") 224 | else: 225 | user_api_key = st.sidebar.text_input( 226 | label="#### Enter OpenAI API key 👇", placeholder="Paste your openAI API key, sk-", type="password", key="openai_api_key" 227 | ) 228 | if user_api_key: 229 | st.sidebar.success("API key loaded", icon="🚀") 230 | 231 | os.environ["OPENAI_API_KEY"] = user_api_key 232 | 233 | 234 | 235 | # Execute the home page function 236 | MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k","gpt-3.5-turbo-16k","gpt-4-1106-preview"] 237 | max_tokens = {"gpt-4":7000, "gpt-4-32k":31000, "gpt-3.5-turbo":3000} 238 | TEMPERATURE_MIN_VALUE = 0.0 239 | TEMPERATURE_MAX_VALUE = 1.0 240 | TEMPERATURE_DEFAULT_VALUE = 0.9 241 | TEMPERATURE_STEP = 0.01 242 | model_name = st.sidebar.selectbox(label="Model", options=MODEL_OPTIONS) 243 | top_p = st.sidebar.slider("Top_P", 0.0, 1.0, 1.0, 0.1) 244 | # freq_penalty = st.sidebar.slider("Frequency Penalty", 0.0, 2.0, 0.0, 0.1) 245 | temperature = st.sidebar.slider( 246 | label="Temperature", 247 | min_value=TEMPERATURE_MIN_VALUE, 248 | max_value=TEMPERATURE_MAX_VALUE, 249 | value=TEMPERATURE_DEFAULT_VALUE, 250 | step=TEMPERATURE_STEP,) 251 | 252 | # Define a dictionary with the function names and their respective functions 253 | functions = [ 254 | "home", 255 | "Chat with CSV", 256 | "Summarize CSV", 257 | "Analyze CSV", 258 | ] 259 | 260 | #st.subheader("Select any generator👇") 261 | # Create a selectbox with the function names as options 262 | selected_function = st.selectbox("Select a functionality", functions) 263 | if selected_function == "home": 264 | home_page() 265 | elif selected_function == "Chat with CSV": 266 | chat(temperature=temperature, model_name=model_name) 267 | elif selected_function == "Summarize CSV": 268 | summary(model_name=model_name, temperature=temperature, top_p=top_p) 269 | elif selected_function == "Analyze CSV": 270 | analyze(temperature=temperature, model_name=model_name) 271 | else: 272 | st.warning("You haven't selected any AI Functionality!!") 273 | 274 | 275 | 276 | 277 | if __name__ == "__main__": 278 | main() 279 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | langchain 3 | langchain_openai 4 | langchain-experimental 5 | langchain-core 6 | langchain-community 7 | pandas 8 | openai 9 | tqdm 10 | python-dotenv 11 | faiss-cpu 12 | tiktoken 13 | tabulate 14 | --------------------------------------------------------------------------------