🧠 CSV AI

├── CSV.png
├── LICENSE
├── README.md
├── app.py
└── requirements.txt


/CSV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Safiullah-Rahu/CSV-AI/7ecfc4b892551b122e30809bf119c2293c4e43f2/CSV.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Safiullah Rahu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CSV-AI 🧠
 2 | 
 3 | CSV-AI is the ultimate app powered by LangChain, OpenAI, and Streamlit that allows you to unlock hidden insights in your CSV files. With CSV-AI, you can effortlessly interact with, summarize, and analyze your CSV files in one convenient place. 
 4 | 
 5 | ## Features
 6 | 
 7 | CSV-AI offers the following key features:
 8 | 
 9 | - **Interact:** Easily navigate through your CSV files and interact with the data.
10 | - **Summarize:** Generate descriptive summaries for your CSV data.
11 | - **Analyze:** Perform advanced data analysis on your CSV files, including filtering, sorting, and visualizing the data.
12 | 
13 | ## Installation
14 | 
15 | To run CSV-AI, follow these steps:
16 | 
17 | 1. Clone this repository to your local machine.
18 | 2. Navigate to the project directory.
19 | 
20 | ```bash
21 | git clone https://github.com/Safiullah-Rahu/CSV-AI.git
22 | cd csv-ai
23 | ```
24 | 3. Install the required packages using `pip` with the provided `requirements.txt` file.
25 | ```bash 
26 | pip install -r requirements.txt
27 | ```
28 | ## Usage
29 | 
30 | To start CSV-AI, run the following command:
31 | ```bash 
32 | streamlit run app.py
33 | ```
34 | This command will launch the CSV-AI app in your default web browser. You can then start exploring and analyzing your CSV files.
35 | 
36 | <img src="CSV.png" alt="CSV-AI App Homepage">
37 | 
38 | ## Feedback and Contributions
39 | If you have any feedback, suggestions, or issues related to CSV-AI, please open an issue on the GitHub repository. Contributions are also welcome! If you would like to contribute to CSV-AI, please follow the guidelines outlined in the Contribution Guidelines.
40 | 
41 | ## License
42 | CSV-AI is licensed under the MIT License.
43 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import streamlit as st
  3 | import os, tempfile
  4 | import pandas as pd
  5 | from langchain.chat_models import ChatOpenAI
  6 | from langchain_community.vectorstores import FAISS
  7 | from langchain.embeddings import OpenAIEmbeddings
  8 | from langchain_text_splitters import RecursiveCharacterTextSplitter
  9 | from langchain_community.document_loaders import CSVLoader
 10 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 11 | from langchain_core.runnables.history import RunnableWithMessageHistory
 12 | from langchain_community.chat_message_histories import ChatMessageHistory
 13 | from langchain_core.chat_history import BaseChatMessageHistory
 14 | from langchain.chains.summarize import load_summarize_chain
 15 | from langchain_core.prompts import MessagesPlaceholder
 16 | from langchain_experimental.agents import create_pandas_dataframe_agent
 17 | import asyncio
 18 | 
 19 | st.set_page_config(page_title="CSV AI", layout="wide")
 20 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 21 | 
 22 | def home_page():
 23 |     st.write("""Select any one feature from above sliderbox: \n
 24 |     1. Chat with CSV \n
 25 |     2. Summarize CSV \n
 26 |     3. Analyze CSV  """)
 27 | 
 28 | @st.cache_resource()
 29 | def retriever_func(uploaded_file):
 30 |     if uploaded_file :
 31 |         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
 32 |             tmp_file.write(uploaded_file.getvalue())
 33 |             tmp_file_path = tmp_file.name
 34 |         try:
 35 |             loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
 36 |             data = loader.load()
 37 |         except:
 38 |             loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252")
 39 |             data = loader.load()
 40 | 
 41 |         text_splitter = RecursiveCharacterTextSplitter(
 42 |                         chunk_size=1000, 
 43 |                         chunk_overlap=200, 
 44 |                         add_start_index=True
 45 |                         )
 46 |         all_splits = text_splitter.split_documents(data)
 47 | 
 48 |         
 49 |         vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
 50 |         retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
 51 |     if not uploaded_file:
 52 |         st.info("Please upload CSV documents to continue.")
 53 |         st.stop()
 54 |     return retriever, vectorstore
 55 | 
 56 | def chat(temperature, model_name):
 57 |     st.write("# Talk to CSV")
 58 |     # Add functionality for Page 1
 59 |     reset = st.sidebar.button("Reset Chat")
 60 |     uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv")
 61 |     retriever, vectorstore = retriever_func(uploaded_file)
 62 |     llm = ChatOpenAI(model_name=model_name, temperature=temperature, streaming=True)
 63 |         
 64 |     def format_docs(docs):
 65 |         return "\n\n".join(doc.page_content for doc in docs)
 66 |     
 67 |     if "messages" not in st.session_state:
 68 |         st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
 69 | 
 70 |     store = {}
 71 | 
 72 |     prompt = ChatPromptTemplate.from_messages(
 73 |         [
 74 |             (
 75 |                 "system",
 76 |                 """Use the following pieces of context to answer the question at the end.
 77 |                   If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context}""",
 78 |             ),
 79 |             MessagesPlaceholder(variable_name="history"),
 80 |             ("human", "{input}"),
 81 |         ]
 82 |     )
 83 |     runnable = prompt | llm
 84 |     
 85 |     def get_session_history(session_id: str) -> BaseChatMessageHistory:
 86 |         if session_id not in store:
 87 |             store[session_id] = ChatMessageHistory()
 88 |         return store[session_id]
 89 | 
 90 | 
 91 |     with_message_history = RunnableWithMessageHistory(
 92 |         runnable,
 93 |         get_session_history,
 94 |         input_messages_key="input",
 95 |         history_messages_key="history",
 96 |     )
 97 | 
 98 |     for msg in st.session_state.messages:
 99 |         st.chat_message(msg["role"]).write(msg["content"])
100 |     async def chat_message():
101 |         if prompt := st.chat_input():
102 |             if not user_api_key: 
103 |                 st.info("Please add your OpenAI API key to continue.")
104 |                 st.stop()
105 |             st.session_state.messages.append({"role": "user", "content": prompt})
106 |             st.chat_message("user").write(prompt)
107 |             contextt = vectorstore.similarity_search(prompt, k=6)
108 |             context = "\n\n".join(doc.page_content for doc in contextt)
109 |             #msg = 
110 |             with st.chat_message("assistant"):
111 |                 message_placeholder = st.empty()
112 |                 text_chunk = ""
113 |                 async for chunk in with_message_history.astream(
114 |                         {"context": context, "input": prompt},
115 |                         config={"configurable": {"session_id": "abc123"}},
116 |                     ):
117 |                     text_chunk += chunk.content
118 |                     message_placeholder.markdown(text_chunk)
119 |                     #st.chat_message("assistant").write(text_chunk)
120 |                 st.session_state.messages.append({"role": "assistant", "content": text_chunk})
121 |         if reset:
122 |             st.session_state["messages"] = []
123 |     asyncio.run(chat_message())
124 | 
125 | 
126 | def summary(model_name, temperature, top_p):
127 |     st.write("# Summary of CSV")
128 |     st.write("Upload your document here:")
129 |     uploaded_file = st.file_uploader("Upload source document", type="csv", label_visibility="collapsed")
130 |     if uploaded_file is not None:
131 |         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
132 |             tmp_file.write(uploaded_file.getvalue())
133 |             tmp_file_path = tmp_file.name
134 |         # encoding = cp1252
135 |         text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap=100)
136 |         try:
137 |             loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252")
138 |             #loader = UnstructuredFileLoader(tmp_file_path)
139 |             data = loader.load()
140 |             texts = text_splitter.split_documents(data)
141 |         except:
142 |             loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
143 |             #loader = UnstructuredFileLoader(tmp_file_path)
144 |             data = loader.load()
145 |             texts = text_splitter.split_documents(data)
146 | 
147 |         os.remove(tmp_file_path)
148 |         gen_sum = st.button("Generate Summary")
149 |         if gen_sum:
150 |             # Initialize the OpenAI module, load and run the summarize chain
151 |             llm = ChatOpenAI(model_name=model_name, temperature=temperature)
152 |             chain = load_summarize_chain(
153 |                 llm=llm,
154 |                 chain_type="map_reduce",
155 | 
156 |                 return_intermediate_steps=True,
157 |                 input_key="input_documents",
158 |                 output_key="output_text",
159 |             )
160 |             result = chain({"input_documents": texts}, return_only_outputs=True)
161 | 
162 |             st.success(result["output_text"])
163 | 
164 | 
165 | def analyze(temperature, model_name):
166 |     st.write("# Analyze CSV")
167 |     #st.write("This is Page 3")
168 |     # Add functionality for Page 3
169 |     reset = st.sidebar.button("Reset Chat")
170 |     uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv")
171 |     #.write(uploaded_file.name)
172 |     if uploaded_file is not None:
173 |         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
174 |             tmp_file.write(uploaded_file.getvalue())
175 |             tmp_file_path = tmp_file.name
176 |         df = pd.read_csv(tmp_file_path)
177 |         llm = ChatOpenAI(model=model_name, temperature=temperature)
178 |         agent = create_pandas_dataframe_agent(llm, df, agent_type="openai-tools", verbose=True)
179 | 
180 |         if "messages" not in st.session_state:
181 |             st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
182 |             
183 |         for msg in st.session_state.messages:
184 |             st.chat_message(msg["role"]).write(msg["content"])
185 | 
186 |         if prompt := st.chat_input(placeholder="What are the names of the columns?"):
187 |             if not user_api_key: 
188 |                 st.info("Please add your OpenAI API key to continue.")
189 |                 st.stop()
190 |             st.session_state.messages.append({"role": "user", "content": prompt})
191 |             st.chat_message("user").write(prompt)
192 |             msg = agent.invoke({"input": prompt, "chat_history": st.session_state.messages})
193 |             st.session_state.messages.append({"role": "assistant", "content": msg["output"]})
194 |             st.chat_message("assistant").write(msg["output"])
195 |         if reset:
196 |             st.session_state["messages"] = []
197 | 
198 | 
199 | # Main App
200 | def main():
201 |     st.markdown(
202 |         """
203 |         <div style='text-align: center;'>
204 |             <h1>🧠 CSV AI</h1>
205 |         </div>
206 |         """,
207 |         unsafe_allow_html=True,
208 |     )
209 |     st.markdown(
210 |         """
211 |         <div style='text-align: center;'>
212 |             <h4>⚡️ Interacting, Analyzing and Summarizing CSV Files!</h4>
213 |         </div>
214 |         """,
215 |         unsafe_allow_html=True,
216 |     )
217 |     global user_api_key
218 |     # #
219 |     # st.sidebar.write("---Made with ❤️---")
220 |     # st.sidebar.write("---")
221 |     if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
222 |         user_api_key = os.environ["OPENAI_API_KEY"]
223 |         st.success("API key loaded from .env", icon="🚀")
224 |     else:
225 |         user_api_key = st.sidebar.text_input(
226 |             label="#### Enter OpenAI API key 👇", placeholder="Paste your openAI API key, sk-", type="password", key="openai_api_key"
227 |         )
228 |         if user_api_key:
229 |             st.sidebar.success("API key loaded", icon="🚀")
230 | 
231 |     os.environ["OPENAI_API_KEY"] = user_api_key
232 | 
233 |     
234 | 
235 |     # Execute the home page function
236 |     MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k","gpt-3.5-turbo-16k","gpt-4-1106-preview"]
237 |     max_tokens = {"gpt-4":7000, "gpt-4-32k":31000, "gpt-3.5-turbo":3000}
238 |     TEMPERATURE_MIN_VALUE = 0.0
239 |     TEMPERATURE_MAX_VALUE = 1.0
240 |     TEMPERATURE_DEFAULT_VALUE = 0.9
241 |     TEMPERATURE_STEP = 0.01
242 |     model_name = st.sidebar.selectbox(label="Model", options=MODEL_OPTIONS)
243 |     top_p = st.sidebar.slider("Top_P", 0.0, 1.0, 1.0, 0.1)
244 |     # freq_penalty = st.sidebar.slider("Frequency Penalty", 0.0, 2.0, 0.0, 0.1)
245 |     temperature = st.sidebar.slider(
246 |                 label="Temperature",
247 |                 min_value=TEMPERATURE_MIN_VALUE,
248 |                 max_value=TEMPERATURE_MAX_VALUE,
249 |                 value=TEMPERATURE_DEFAULT_VALUE,
250 |                 step=TEMPERATURE_STEP,)
251 | 
252 |     # Define a dictionary with the function names and their respective functions
253 |     functions = [
254 |         "home",
255 |         "Chat with CSV",
256 |         "Summarize CSV",
257 |         "Analyze CSV",
258 |     ]
259 |     
260 |     #st.subheader("Select any generator👇")
261 |     # Create a selectbox with the function names as options
262 |     selected_function = st.selectbox("Select a functionality", functions)
263 |     if selected_function == "home":
264 |         home_page()
265 |     elif selected_function == "Chat with CSV":
266 |         chat(temperature=temperature, model_name=model_name)
267 |     elif selected_function == "Summarize CSV":
268 |         summary(model_name=model_name, temperature=temperature, top_p=top_p)
269 |     elif selected_function == "Analyze CSV":
270 |         analyze(temperature=temperature, model_name=model_name)
271 |     else:
272 |         st.warning("You haven't selected any AI Functionality!!")
273 |     
274 | 
275 |     
276 | 
277 | if __name__ == "__main__":
278 |     main()
279 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | langchain
 3 | langchain_openai
 4 | langchain-experimental
 5 | langchain-core
 6 | langchain-community
 7 | pandas
 8 | openai
 9 | tqdm
10 | python-dotenv
11 | faiss-cpu
12 | tiktoken
13 | tabulate
14 | 


--------------------------------------------------------------------------------