├── screenshots ├── csv_query.png ├── csv_headings.png ├── csv_prompts.png └── csv_query_answer.png ├── README.md ├── requirements.txt └── CSV_Agent_Deployed.py /screenshots/csv_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_query.png -------------------------------------------------------------------------------- /screenshots/csv_headings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_headings.png -------------------------------------------------------------------------------- /screenshots/csv_prompts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_prompts.png -------------------------------------------------------------------------------- /screenshots/csv_query_answer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TMoneyBidness/CSV_Agent_with_Prompts/HEAD/screenshots/csv_query_answer.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Query your CSV using Langchain and Streamlit 2 | 3 | Beyond a basic CSV Agent to query your tabular data, this app allows you to provide a prompt template to the agent, task objectives, and contextual information about your data. 4 | 5 | This project was just a workhorse for a friend. I'm currently working on a large commerical copywriting application. 6 | 7 | Feel free to reach out on [Twitter to collaborate: @TradersCraft](https://twitter.com/TradersCraft) 8 | 9 | *Note: There is a token limit of 4096, so be gentle.* 10 | 11 | ## Requirements 12 | 13 | Install the required packages by running 14 | 15 | ``` 16 | pip install -r requirements.txt 17 | ``` 18 | ## Layout 19 | 20 |

21 | 22 |

23 | 24 |

25 | 26 |

27 | 28 |

29 | 30 |

31 | 32 |

33 | 34 |

35 | 36 | #### Have fun! 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt 2 | 3 | # Standard Library Imports 4 | 5 | aiohttp==3.8.4 6 | aiosignal==1.3.1 7 | altair==5.0.1 8 | async-timeout==4.0.2 9 | attrs==23.1.0 10 | blinker==1.6.2 11 | cachetools==5.3.1 12 | certifi==2023.5.7 13 | charset-normalizer==3.1.0 14 | click==8.1.3 15 | colorama==0.4.6 16 | dataclasses-json==0.5.7 17 | decorator==5.1.1 18 | frozenlist==1.3.3 19 | gitdb==4.0.10 20 | GitPython==3.1.31 21 | greenlet==2.0.2 22 | idna==3.4 23 | importlib-metadata==6.6.0 24 | Jinja2==3.1.2 25 | jsonschema==4.17.3 26 | langchain==0.0.188 27 | markdown-it-py==2.2.0 28 | MarkupSafe==2.1.2 29 | marshmallow==3.19.0 30 | marshmallow-enum==1.5.1 31 | mdurl==0.1.2 32 | multidict==6.0.4 33 | mypy-extensions==1.0.0 34 | numexpr==2.8.4 35 | numpy==1.24.3 36 | openai==0.27.7 37 | openapi-schema-pydantic==1.2.4 38 | packaging==23.1 39 | pandas==2.0.2 40 | Pillow==9.5.0 41 | protobuf==4.23.2 42 | pyarrow==12.0.0 43 | pydantic==1.10.8 44 | pydeck==0.8.1b0 45 | Pygments==2.15.1 46 | Pympler==1.0.1 47 | pyrsistent==0.19.3 48 | python-dateutil==2.8.2 49 | python-environ==0.4.54 50 | pytz==2023.3 51 | pytz-deprecation-shim==0.1.0.post0 52 | PyYAML==6.0 53 | requests==2.31.0 54 | rich==13.4.1 55 | six==1.16.0 56 | smmap==5.0.0 57 | SQLAlchemy==2.0.15 58 | streamlit==1.23.1 59 | tabulate==0.9.0 60 | tenacity==8.2.2 61 | toml==0.10.2 62 | toolz==0.12.0 63 | tornado==6.3.2 64 | tqdm==4.65.0 65 | typing-inspect==0.9.0 66 | typing_extensions==4.6.3 67 | tzdata==2023.3 68 | tzlocal==4.3 69 | urllib3==2.0.2 70 | validators==0.20.0 71 | watchdog==3.0.0 72 | yarl==1.9.2 73 | zipp==3.15.0 -------------------------------------------------------------------------------- /CSV_Agent_Deployed.py: -------------------------------------------------------------------------------- 1 | # Standard Library Imports 2 | import json 3 | import os 4 | import re 5 | import time 6 | import warnings 7 | 8 | # Third-Party Library Imports 9 | import numpy as np 10 | import pandas as pd 11 | import streamlit as st 12 | 13 | # Langchain Library Imports 14 | from langchain import ( 15 | LLMChain, 16 | PromptTemplate, 17 | OpenAI, 18 | ) 19 | from langchain.agents import ( 20 | initialize_agent, 21 | Tool, 22 | AgentType, 23 | create_csv_agent, 24 | create_pandas_dataframe_agent, 25 | load_tools, 26 | ZeroShotAgent, 27 | AgentExecutor, 28 | ) 29 | from langchain.chat_models import ChatOpenAI 30 | from langchain.chains import RetrievalQA 31 | 32 | 33 | ############################################################################################################ 34 | # Load environment variables 35 | 36 | # HOSTED 37 | API_KEY = None 38 | # API_KEY = st.secrets["apikey"] 39 | 40 | ############################################################################################################ 41 | 42 | # Initialize session state variables 43 | if "headings_list" not in st.session_state: 44 | st.session_state.headings_list = "" 45 | 46 | # 3. Load Data Function 47 | def load_data(path): 48 | '''This function loads a csv file from the provided path and returns a pandas DataFrame''' 49 | try: 50 | df = pd.read_csv(path) 51 | return df 52 | except Exception as e: 53 | print(f"Error occurred: {e}") 54 | return None 55 | 56 | def get_headings(df): 57 | headings = df.columns.tolist() 58 | return headings 59 | 60 | # Return the list of headings 61 | return headings 62 | 63 | # Function to preview the column headings of the uploaded CSV file 64 | def show_headings(): 65 | if st.session_state.data is not None: 66 | headings = get_headings(st.session_state.data) 67 | st.session_state.headings_list = "\n".join(headings) 68 | 69 | 70 | # Function to process the DataFrame and generate insights 71 | def df_agent(df, agent_context, describe_dataset, query): 72 | if API_KEY is None: 73 | st.error("Please enter the password or your API key to proceed.") 74 | return 75 | llm = OpenAI(openai_api_key=API_KEY,temperature=0) 76 | # llm = ChatOpenAI(openai_api_key=API_KEY,temperature=0, model_name='gpt-4') <- Trial with ChatGPT 4 77 | df_agent_research = create_pandas_dataframe_agent(llm, df, handle_parsing_errors=True) 78 | df_agent_analysis = df_agent_research( 79 | { 80 | "input": f"You are DataFrameAI, the most advanced dataframe analysis agent on the planet. You are collaborating with a company to provide skilled, in-depth data analysis on a large table. They are looking to gain competitive business insights from this data, in order to gain an edge over their competitors. They are looking to analyze trends, ratios, hidden insights, and more. \ 81 | You are a professional data science and analysis agent with the following strengths: {agent_context} \ 82 | The dataset can be described as follows: {describe_dataset} \ 83 | Specifically, they are looking to answer the following question about the data: {query} \ 84 | OUTPUT: Provide detailed, actionable insights. I am not looking for one or two sentences. I want a paragraph at least, including statistics, totals, etc. Be very specific, and analyze multiple columns or rows against each other. Whatever is required to provide the most advanced information possible!" 85 | } 86 | ) 87 | df_agent_findings = df_agent_analysis["output"] 88 | return df_agent_findings 89 | 90 | 91 | ############################################################################################################ 92 | 93 | # STREAMLIT APP 94 | st.title("👨‍💻 Query your CSV with an AI Agent using Langchain") 95 | st.write("Beyond a basic CSV Agent to query your tabular data, this app allows you to provide a prompt to the agent, preview headings, provide task objectives, and contextual information about your data.") 96 | st.write("Uses OpenAI. You need the key, or...hit me up if you're cool and I can give you the password!") 97 | 98 | # Add a password input 99 | password = st.text_input("Enter the password to use the default API key") 100 | 101 | # Check if the password is correct 102 | if password == st.secrets["password"]: 103 | API_KEY = st.secrets["apikey"] 104 | else: 105 | API_KEY = st.text_input("Enter your own API key", type='password') 106 | 107 | uploaded_file = st.file_uploader("Please upload your CSV file below") 108 | 109 | 110 | if uploaded_file is not None: 111 | if uploaded_file.size == 0: 112 | st.write("The uploaded file is empty.") 113 | else: 114 | try: 115 | st.session_state.data = pd.read_csv(uploaded_file) 116 | except pd.errors.EmptyDataError: 117 | st.write("The uploaded file does not contain any data or columns.") 118 | else: 119 | st.session_state.data = None 120 | 121 | if st.button("PREVIEW HEADINGS", type="secondary", help="Click to preview headings", on_click=show_headings): 122 | pass 123 | 124 | # Display the headings text area 125 | headings_list = st.text_area(label="Headings", value=st.session_state.headings_list, key="headings") 126 | 127 | describe_dataset = st.text_area("Please describe your dataset. e.g., 'This is Amazon sales data that contains XYZ.'") 128 | objectives = st.text_area("Describe your objectives. e.g., 'I am specifically looking for data insights related to overlooked ratios, key performance indicators, or hidden insights. Test correlations or complete data analysis when required.'") 129 | agent_context = st.text_area("Agent context prompt. e.g., 'You are a skilled data scientist. You are looking for trends, ratios, and actionable insights into the data. Your answers will result in marketing spend decisions, so be as specific as possible.'") 130 | query = st.text_area("Type your query") 131 | 132 | 133 | if st.session_state.data is not None: 134 | if isinstance(st.session_state.data, pd.DataFrame): 135 | if st.button("Submit Query"): 136 | progress_bar = st.progress(0) 137 | status_text = st.empty() 138 | max_time = 25 # You should estimate how long the task will take 139 | 140 | for i in range(max_time): 141 | progress_bar.progress((i + 1) / max_time) 142 | status_text.text(f'Analyzing Data: {i+1}') 143 | time.sleep(1) # Sleep for a second to slow down the progress 144 | 145 | status_text.text('Running query...') 146 | dataframe_insights = df_agent(st.session_state.data, agent_context, describe_dataset, query) 147 | progress_bar.empty() # You can empty the progress bar here 148 | 149 | # status_text.text('Query Completed') # Updating the status text 150 | # st.markdown(f'
{dataframe_insights}
', unsafe_allow_html=True) 151 | status_text.text('Query Completed') # Updating the status text 152 | 153 | markdown_style = ''' 154 | 163 | ''' 164 | 165 | markdown_html = f'
{dataframe_insights}
' 166 | st.markdown(markdown_style, unsafe_allow_html=True) 167 | st.markdown(markdown_html, unsafe_allow_html=True) 168 | 169 | --------------------------------------------------------------------------------