├── screenshots
├── csv_query.png
├── csv_headings.png
├── csv_prompts.png
└── csv_query_answer.png
├── README.md
├── requirements.txt
└── CSV_Agent_Deployed.py
/screenshots/csv_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_query.png
--------------------------------------------------------------------------------
/screenshots/csv_headings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_headings.png
--------------------------------------------------------------------------------
/screenshots/csv_prompts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_prompts.png
--------------------------------------------------------------------------------
/screenshots/csv_query_answer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_query_answer.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Query your CSV using Langchain and Streamlit
2 |
3 | Beyond a basic CSV Agent to query your tabular data, this app allows you to provide a prompt template to the agent, task objectives, and contextual information about your data.
4 |
5 | This project was just a workhorse for a friend. I'm currently working on a large commerical copywriting application.
6 |
7 | Feel free to reach out on [Twitter to collaborate: @TradersCraft](https://twitter.com/TradersCraft)
8 |
9 | *Note: There is a token limit of 4096, so be gentle.*
10 |
11 | ## Requirements
12 |
13 | Install the required packages by running
14 |
15 | ```
16 | pip install -r requirements.txt
17 | ```
18 | ## Layout
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | #### Have fun!
37 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # requirements.txt
2 |
3 | # Standard Library Imports
4 |
5 | aiohttp==3.8.4
6 | aiosignal==1.3.1
7 | altair==5.0.1
8 | async-timeout==4.0.2
9 | attrs==23.1.0
10 | blinker==1.6.2
11 | cachetools==5.3.1
12 | certifi==2023.5.7
13 | charset-normalizer==3.1.0
14 | click==8.1.3
15 | colorama==0.4.6
16 | dataclasses-json==0.5.7
17 | decorator==5.1.1
18 | frozenlist==1.3.3
19 | gitdb==4.0.10
20 | GitPython==3.1.31
21 | greenlet==2.0.2
22 | idna==3.4
23 | importlib-metadata==6.6.0
24 | Jinja2==3.1.2
25 | jsonschema==4.17.3
26 | langchain==0.0.188
27 | markdown-it-py==2.2.0
28 | MarkupSafe==2.1.2
29 | marshmallow==3.19.0
30 | marshmallow-enum==1.5.1
31 | mdurl==0.1.2
32 | multidict==6.0.4
33 | mypy-extensions==1.0.0
34 | numexpr==2.8.4
35 | numpy==1.24.3
36 | openai==0.27.7
37 | openapi-schema-pydantic==1.2.4
38 | packaging==23.1
39 | pandas==2.0.2
40 | Pillow==9.5.0
41 | protobuf==4.23.2
42 | pyarrow==12.0.0
43 | pydantic==1.10.8
44 | pydeck==0.8.1b0
45 | Pygments==2.15.1
46 | Pympler==1.0.1
47 | pyrsistent==0.19.3
48 | python-dateutil==2.8.2
49 | python-environ==0.4.54
50 | pytz==2023.3
51 | pytz-deprecation-shim==0.1.0.post0
52 | PyYAML==6.0
53 | requests==2.31.0
54 | rich==13.4.1
55 | six==1.16.0
56 | smmap==5.0.0
57 | SQLAlchemy==2.0.15
58 | streamlit==1.23.1
59 | tabulate==0.9.0
60 | tenacity==8.2.2
61 | toml==0.10.2
62 | toolz==0.12.0
63 | tornado==6.3.2
64 | tqdm==4.65.0
65 | typing-inspect==0.9.0
66 | typing_extensions==4.6.3
67 | tzdata==2023.3
68 | tzlocal==4.3
69 | urllib3==2.0.2
70 | validators==0.20.0
71 | watchdog==3.0.0
72 | yarl==1.9.2
73 | zipp==3.15.0
--------------------------------------------------------------------------------
/CSV_Agent_Deployed.py:
--------------------------------------------------------------------------------
1 | # Standard Library Imports
2 | import json
3 | import os
4 | import re
5 | import time
6 | import warnings
7 |
8 | # Third-Party Library Imports
9 | import numpy as np
10 | import pandas as pd
11 | import streamlit as st
12 |
13 | # Langchain Library Imports
14 | from langchain import (
15 | LLMChain,
16 | PromptTemplate,
17 | OpenAI,
18 | )
19 | from langchain.agents import (
20 | initialize_agent,
21 | Tool,
22 | AgentType,
23 | create_csv_agent,
24 | create_pandas_dataframe_agent,
25 | load_tools,
26 | ZeroShotAgent,
27 | AgentExecutor,
28 | )
29 | from langchain.chat_models import ChatOpenAI
30 | from langchain.chains import RetrievalQA
31 |
32 |
33 | ############################################################################################################
34 | # Load environment variables
35 |
36 | # HOSTED
37 | API_KEY = None
38 | # API_KEY = st.secrets["apikey"]
39 |
40 | ############################################################################################################
41 |
42 | # Initialize session state variables
43 | if "headings_list" not in st.session_state:
44 | st.session_state.headings_list = ""
45 |
46 | # 3. Load Data Function
47 | def load_data(path):
48 | '''This function loads a csv file from the provided path and returns a pandas DataFrame'''
49 | try:
50 | df = pd.read_csv(path)
51 | return df
52 | except Exception as e:
53 | print(f"Error occurred: {e}")
54 | return None
55 |
56 | def get_headings(df):
57 | headings = df.columns.tolist()
58 | return headings
59 |
60 | # Return the list of headings
61 | return headings
62 |
63 | # Function to preview the column headings of the uploaded CSV file
64 | def show_headings():
65 | if st.session_state.data is not None:
66 | headings = get_headings(st.session_state.data)
67 | st.session_state.headings_list = "\n".join(headings)
68 |
69 |
70 | # Function to process the DataFrame and generate insights
71 | def df_agent(df, agent_context, describe_dataset, query):
72 | if API_KEY is None:
73 | st.error("Please enter the password or your API key to proceed.")
74 | return
75 | llm = OpenAI(openai_api_key=API_KEY,temperature=0)
76 | # llm = ChatOpenAI(openai_api_key=API_KEY,temperature=0, model_name='gpt-4') <- Trial with ChatGPT 4
77 | df_agent_research = create_pandas_dataframe_agent(llm, df, handle_parsing_errors=True)
78 | df_agent_analysis = df_agent_research(
79 | {
80 | "input": f"You are DataFrameAI, the most advanced dataframe analysis agent on the planet. You are collaborating with a company to provide skilled, in-depth data analysis on a large table. They are looking to gain competitive business insights from this data, in order to gain an edge over their competitors. They are looking to analyze trends, ratios, hidden insights, and more. \
81 | You are a professional data science and analysis agent with the following strengths: {agent_context} \
82 | The dataset can be described as follows: {describe_dataset} \
83 | Specifically, they are looking to answer the following question about the data: {query} \
84 | OUTPUT: Provide detailed, actionable insights. I am not looking for one or two sentences. I want a paragraph at least, including statistics, totals, etc. Be very specific, and analyze multiple columns or rows against each other. Whatever is required to provide the most advanced information possible!"
85 | }
86 | )
87 | df_agent_findings = df_agent_analysis["output"]
88 | return df_agent_findings
89 |
90 |
91 | ############################################################################################################
92 |
93 | # STREAMLIT APP
94 | st.title("👨💻 Query your CSV with an AI Agent using Langchain")
95 | st.write("Beyond a basic CSV Agent to query your tabular data, this app allows you to provide a prompt to the agent, preview headings, provide task objectives, and contextual information about your data.")
96 | st.write("Uses OpenAI. You need the key, or...hit me up if you're cool and I can give you the password!")
97 |
98 | # Add a password input
99 | password = st.text_input("Enter the password to use the default API key")
100 |
101 | # Check if the password is correct
102 | if password == st.secrets["password"]:
103 | API_KEY = st.secrets["apikey"]
104 | else:
105 | API_KEY = st.text_input("Enter your own API key", type='password')
106 |
107 | uploaded_file = st.file_uploader("Please upload your CSV file below")
108 |
109 |
110 | if uploaded_file is not None:
111 | if uploaded_file.size == 0:
112 | st.write("The uploaded file is empty.")
113 | else:
114 | try:
115 | st.session_state.data = pd.read_csv(uploaded_file)
116 | except pd.errors.EmptyDataError:
117 | st.write("The uploaded file does not contain any data or columns.")
118 | else:
119 | st.session_state.data = None
120 |
121 | if st.button("PREVIEW HEADINGS", type="secondary", help="Click to preview headings", on_click=show_headings):
122 | pass
123 |
124 | # Display the headings text area
125 | headings_list = st.text_area(label="Headings", value=st.session_state.headings_list, key="headings")
126 |
127 | describe_dataset = st.text_area("Please describe your dataset. e.g., 'This is Amazon sales data that contains XYZ.'")
128 | objectives = st.text_area("Describe your objectives. e.g., 'I am specifically looking for data insights related to overlooked ratios, key performance indicators, or hidden insights. Test correlations or complete data analysis when required.'")
129 | agent_context = st.text_area("Agent context prompt. e.g., 'You are a skilled data scientist. You are looking for trends, ratios, and actionable insights into the data. Your answers will result in marketing spend decisions, so be as specific as possible.'")
130 | query = st.text_area("Type your query")
131 |
132 |
133 | if st.session_state.data is not None:
134 | if isinstance(st.session_state.data, pd.DataFrame):
135 | if st.button("Submit Query"):
136 | progress_bar = st.progress(0)
137 | status_text = st.empty()
138 | max_time = 25 # You should estimate how long the task will take
139 |
140 | for i in range(max_time):
141 | progress_bar.progress((i + 1) / max_time)
142 | status_text.text(f'Analyzing Data: {i+1}')
143 | time.sleep(1) # Sleep for a second to slow down the progress
144 |
145 | status_text.text('Running query...')
146 | dataframe_insights = df_agent(st.session_state.data, agent_context, describe_dataset, query)
147 | progress_bar.empty() # You can empty the progress bar here
148 |
149 | # status_text.text('Query Completed') # Updating the status text
150 | # st.markdown(f'{dataframe_insights}
', unsafe_allow_html=True)
151 | status_text.text('Query Completed') # Updating the status text
152 |
153 | markdown_style = '''
154 |
163 | '''
164 |
165 | markdown_html = f'{dataframe_insights}
'
166 | st.markdown(markdown_style, unsafe_allow_html=True)
167 | st.markdown(markdown_html, unsafe_allow_html=True)
168 |
169 |
--------------------------------------------------------------------------------