├── CSV.png
├── LICENSE
├── README.md
├── app.py
└── requirements.txt
/CSV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Safiullah-Rahu/CSV-AI/7ecfc4b892551b122e30809bf119c2293c4e43f2/CSV.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Safiullah Rahu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CSV-AI 🧠
2 |
3 | CSV-AI is the ultimate app powered by LangChain, OpenAI, and Streamlit that allows you to unlock hidden insights in your CSV files. With CSV-AI, you can effortlessly interact with, summarize, and analyze your CSV files in one convenient place.
4 |
5 | ## Features
6 |
7 | CSV-AI offers the following key features:
8 |
9 | - **Interact:** Easily navigate through your CSV files and interact with the data.
10 | - **Summarize:** Generate descriptive summaries for your CSV data.
11 | - **Analyze:** Perform advanced data analysis on your CSV files, including filtering, sorting, and visualizing the data.
12 |
13 | ## Installation
14 |
15 | To run CSV-AI, follow these steps:
16 |
17 | 1. Clone this repository to your local machine.
18 | 2. Navigate to the project directory.
19 |
20 | ```bash
21 | git clone https://github.com/Safiullah-Rahu/CSV-AI.git
22 | cd csv-ai
23 | ```
24 | 3. Install the required packages using `pip` with the provided `requirements.txt` file.
25 | ```bash
26 | pip install -r requirements.txt
27 | ```
28 | ## Usage
29 |
30 | To start CSV-AI, run the following command:
31 | ```bash
32 | streamlit run app.py
33 | ```
34 | This command will launch the CSV-AI app in your default web browser. You can then start exploring and analyzing your CSV files.
35 |
36 |
37 |
38 | ## Feedback and Contributions
39 | If you have any feedback, suggestions, or issues related to CSV-AI, please open an issue on the GitHub repository. Contributions are also welcome! If you would like to contribute to CSV-AI, please follow the guidelines outlined in the Contribution Guidelines.
40 |
41 | ## License
42 | CSV-AI is licensed under the MIT License.
43 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | # imports
2 | import streamlit as st
3 | import os, tempfile
4 | import pandas as pd
5 | from langchain.chat_models import ChatOpenAI
6 | from langchain_community.vectorstores import FAISS
7 | from langchain.embeddings import OpenAIEmbeddings
8 | from langchain_text_splitters import RecursiveCharacterTextSplitter
9 | from langchain_community.document_loaders import CSVLoader
10 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
11 | from langchain_core.runnables.history import RunnableWithMessageHistory
12 | from langchain_community.chat_message_histories import ChatMessageHistory
13 | from langchain_core.chat_history import BaseChatMessageHistory
14 | from langchain.chains.summarize import load_summarize_chain
15 | from langchain_core.prompts import MessagesPlaceholder
16 | from langchain_experimental.agents import create_pandas_dataframe_agent
17 | import asyncio
18 |
19 | st.set_page_config(page_title="CSV AI", layout="wide")
20 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
21 |
22 | def home_page():
23 | st.write("""Select any one feature from above sliderbox: \n
24 | 1. Chat with CSV \n
25 | 2. Summarize CSV \n
26 | 3. Analyze CSV """)
27 |
28 | @st.cache_resource()
29 | def retriever_func(uploaded_file):
30 | if uploaded_file :
31 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
32 | tmp_file.write(uploaded_file.getvalue())
33 | tmp_file_path = tmp_file.name
34 | try:
35 | loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
36 | data = loader.load()
37 | except:
38 | loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252")
39 | data = loader.load()
40 |
41 | text_splitter = RecursiveCharacterTextSplitter(
42 | chunk_size=1000,
43 | chunk_overlap=200,
44 | add_start_index=True
45 | )
46 | all_splits = text_splitter.split_documents(data)
47 |
48 |
49 | vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
50 | retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
51 | if not uploaded_file:
52 | st.info("Please upload CSV documents to continue.")
53 | st.stop()
54 | return retriever, vectorstore
55 |
56 | def chat(temperature, model_name):
57 | st.write("# Talk to CSV")
58 | # Add functionality for Page 1
59 | reset = st.sidebar.button("Reset Chat")
60 | uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv")
61 | retriever, vectorstore = retriever_func(uploaded_file)
62 | llm = ChatOpenAI(model_name=model_name, temperature=temperature, streaming=True)
63 |
64 | def format_docs(docs):
65 | return "\n\n".join(doc.page_content for doc in docs)
66 |
67 | if "messages" not in st.session_state:
68 | st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
69 |
70 | store = {}
71 |
72 | prompt = ChatPromptTemplate.from_messages(
73 | [
74 | (
75 | "system",
76 | """Use the following pieces of context to answer the question at the end.
77 | If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context}""",
78 | ),
79 | MessagesPlaceholder(variable_name="history"),
80 | ("human", "{input}"),
81 | ]
82 | )
83 | runnable = prompt | llm
84 |
85 | def get_session_history(session_id: str) -> BaseChatMessageHistory:
86 | if session_id not in store:
87 | store[session_id] = ChatMessageHistory()
88 | return store[session_id]
89 |
90 |
91 | with_message_history = RunnableWithMessageHistory(
92 | runnable,
93 | get_session_history,
94 | input_messages_key="input",
95 | history_messages_key="history",
96 | )
97 |
98 | for msg in st.session_state.messages:
99 | st.chat_message(msg["role"]).write(msg["content"])
100 | async def chat_message():
101 | if prompt := st.chat_input():
102 | if not user_api_key:
103 | st.info("Please add your OpenAI API key to continue.")
104 | st.stop()
105 | st.session_state.messages.append({"role": "user", "content": prompt})
106 | st.chat_message("user").write(prompt)
107 | contextt = vectorstore.similarity_search(prompt, k=6)
108 | context = "\n\n".join(doc.page_content for doc in contextt)
109 | #msg =
110 | with st.chat_message("assistant"):
111 | message_placeholder = st.empty()
112 | text_chunk = ""
113 | async for chunk in with_message_history.astream(
114 | {"context": context, "input": prompt},
115 | config={"configurable": {"session_id": "abc123"}},
116 | ):
117 | text_chunk += chunk.content
118 | message_placeholder.markdown(text_chunk)
119 | #st.chat_message("assistant").write(text_chunk)
120 | st.session_state.messages.append({"role": "assistant", "content": text_chunk})
121 | if reset:
122 | st.session_state["messages"] = []
123 | asyncio.run(chat_message())
124 |
125 |
126 | def summary(model_name, temperature, top_p):
127 | st.write("# Summary of CSV")
128 | st.write("Upload your document here:")
129 | uploaded_file = st.file_uploader("Upload source document", type="csv", label_visibility="collapsed")
130 | if uploaded_file is not None:
131 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
132 | tmp_file.write(uploaded_file.getvalue())
133 | tmp_file_path = tmp_file.name
134 | # encoding = cp1252
135 | text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap=100)
136 | try:
137 | loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252")
138 | #loader = UnstructuredFileLoader(tmp_file_path)
139 | data = loader.load()
140 | texts = text_splitter.split_documents(data)
141 | except:
142 | loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
143 | #loader = UnstructuredFileLoader(tmp_file_path)
144 | data = loader.load()
145 | texts = text_splitter.split_documents(data)
146 |
147 | os.remove(tmp_file_path)
148 | gen_sum = st.button("Generate Summary")
149 | if gen_sum:
150 | # Initialize the OpenAI module, load and run the summarize chain
151 | llm = ChatOpenAI(model_name=model_name, temperature=temperature)
152 | chain = load_summarize_chain(
153 | llm=llm,
154 | chain_type="map_reduce",
155 |
156 | return_intermediate_steps=True,
157 | input_key="input_documents",
158 | output_key="output_text",
159 | )
160 | result = chain({"input_documents": texts}, return_only_outputs=True)
161 |
162 | st.success(result["output_text"])
163 |
164 |
165 | def analyze(temperature, model_name):
166 | st.write("# Analyze CSV")
167 | #st.write("This is Page 3")
168 | # Add functionality for Page 3
169 | reset = st.sidebar.button("Reset Chat")
170 | uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv")
171 | #.write(uploaded_file.name)
172 | if uploaded_file is not None:
173 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
174 | tmp_file.write(uploaded_file.getvalue())
175 | tmp_file_path = tmp_file.name
176 | df = pd.read_csv(tmp_file_path)
177 | llm = ChatOpenAI(model=model_name, temperature=temperature)
178 | agent = create_pandas_dataframe_agent(llm, df, agent_type="openai-tools", verbose=True)
179 |
180 | if "messages" not in st.session_state:
181 | st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
182 |
183 | for msg in st.session_state.messages:
184 | st.chat_message(msg["role"]).write(msg["content"])
185 |
186 | if prompt := st.chat_input(placeholder="What are the names of the columns?"):
187 | if not user_api_key:
188 | st.info("Please add your OpenAI API key to continue.")
189 | st.stop()
190 | st.session_state.messages.append({"role": "user", "content": prompt})
191 | st.chat_message("user").write(prompt)
192 | msg = agent.invoke({"input": prompt, "chat_history": st.session_state.messages})
193 | st.session_state.messages.append({"role": "assistant", "content": msg["output"]})
194 | st.chat_message("assistant").write(msg["output"])
195 | if reset:
196 | st.session_state["messages"] = []
197 |
198 |
199 | # Main App
200 | def main():
201 | st.markdown(
202 | """
203 |