├── README.md
├── requirements.txt
├── .gitignore
├── frontend.py
├── survey.csv
└── model.py


/README.md:
--------------------------------------------------------------------------------
1 | # Rag-with-your-csv


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers 
 2 | accelerate 
 3 | langchain 
 4 | openai
 5 | chromadb
 6 | langchain-community
 7 | tiktoken
 8 | python-dotenv
 9 | langchain_ollama
10 | streamlit
11 | streamlit_chat


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ### AL ###
 2 | #Template for AL projects for Dynamics 365 Business Central
 3 | #launch.json folder
 4 | .vscode/
 5 | #Cache folder
 6 | .alcache/
 7 | #Symbols folder
 8 | .alpackages/
 9 | #Snapshots folder
10 | .snapshots/
11 | #Testing Output folder
12 | .output/
13 | #Extension App-file
14 | *.app
15 | #Rapid Application Development File
16 | rad.json
17 | #Translation Base-file
18 | *.g.xlf
19 | #License-file
20 | *.flf
21 | #Test results file
22 | TestResults.xml


--------------------------------------------------------------------------------
/frontend.py:
--------------------------------------------------------------------------------
 1 | from model import run
 2 | import streamlit as st
 3 | from streamlit_chat import message as st_message
 4 | 
 5 | 
 6 | # Initialize chat history
 7 | if "messages" not in st.session_state:
 8 |     st.session_state.messages = []
 9 | 
10 | # Display chat messages from history on app rerun
11 | for message in st.session_state.messages:
12 |     with st.chat_message(message["role"]):
13 |         st.markdown(message["content"])
14 | 
15 | if prompt := st.chat_input("What is up?"):
16 |     # Display user message in chat message container
17 |     with st.chat_message("user"):
18 |         st.markdown(prompt)
19 |     # Add user message to chat history
20 |     st.session_state.messages.append({"role": "user", "content": prompt})
21 | 
22 |     response = run(prompt)
23 |     # Display assistant response in chat message container
24 |     with st.chat_message("assistant"):
25 |         st.markdown(response)
26 |     # Add assistant response to chat history
27 |     st.session_state.messages.append({"role": "assistant", "content": response})


--------------------------------------------------------------------------------
/survey.csv:
--------------------------------------------------------------------------------
 1 | Question,Answer
 2 | What sizes do you carry?,"We carry a range of sizes from XS to XXL, and some styles include additional sizes."
 3 | Do you offer plus-size options?,"Yes, we offer an extensive plus-size collection designed for style and comfort."
 4 | Are your clothes true to size?,"Our clothing generally runs true to size, but we recommend checking our size guide for precise measurements."
 5 | What materials are your clothes made from?,"Our clothes are made from high-quality materials such as cotton, linen, and sustainable synthetics."
 6 | How should I care for your clothing items?,"Each item includes a care label; generally, machine washing on a gentle cycle and air drying is recommended."
 7 | Where are your products manufactured?,"Our products are manufactured in ethical factories located across various regions, including Europe and Asia."
 8 | Do you offer international shipping?,"Yes, we offer international shipping to many countries. Please check our website for specific locations."
 9 | What is your return policy?,We have a 30-day return policy for unworn items with original tags attached.
10 | How long does shipping usually take?,"Shipping typically takes 3-7 business days domestically and 10-15 days internationally, depending on location."
11 | Do you have a physical store location?,"Yes, we have a flagship store in [City/Location], and our store locator can help you find one near you."
12 | Can I track my order?,"Yes, once your order is shipped, you'll receive a tracking number via email to monitor your package."
13 | Are there any discounts or promotions available?,We often have promotions; sign up for our newsletter and follow us on social media for updates.
14 | How often do you release new collections?,"We release new collections seasonally, around four times a year, to align with fashion trends."
15 | Do you provide gift wrapping services?,"Yes, we offer gift wrapping for a small additional fee, perfect for special occasions."
16 | What payment methods do you accept?,"We accept major credit cards, digital wallets, and other secure payment options."
17 | Can I exchange an item for a different size?,"We offer exchanges for different sizes, subject to availability. Contact customer service for assistance."
18 | Do you have a loyalty or rewards program?,"Yes, join our loyalty program to earn points with every purchase and receive exclusive discounts."
19 | Are your products environmentally friendly?,We prioritize sustainability by incorporating eco-friendly materials and ethical production practices.
20 | How do I know if an item is in stock?,"Our website reflects real-time stock availability. If an item is out, consider signing up for restock notifications."
21 | Can I pre-order upcoming items?,"Yes, certain items can be pre-ordered, allowing you early access to fresh styles."


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | from dotenv import load_dotenv  # Load environment variables from a .env file
  4 | 
  5 | import textwrap  # (Optional) For formatting text if needed later
  6 | 
  7 | # Import various libraries used in the script
  8 | import langchain
  9 | import chromadb
 10 | import transformers
 11 | import openai
 12 | import torch
 13 | import requests
 14 | import json
 15 | 
 16 | # Import specific classes and functions from libraries
 17 | from transformers import AutoTokenizer
 18 | from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 19 | from langchain.text_splitter import CharacterTextSplitter
 20 | from langchain.document_loaders.csv_loader import CSVLoader
 21 | from langchain.embeddings import OpenAIEmbeddings
 22 | from langchain.vectorstores import Chroma
 23 | from langchain.chains import RetrievalQA
 24 | from langchain.prompts import PromptTemplate
 25 | from langchain_ollama.llms import OllamaLLM
 26 | 
 27 | 
 28 | 
 29 | # =============================================================================
 30 | # 1. Environment Setup and API Key Loading
 31 | # =============================================================================
 32 | 
 33 | # Load environment variables from the .env file (e.g., API keys)
 34 | 
 35 | 
 36 | # =============================================================================
 37 | # 6. Create the RetrievalQA Chain
 38 | # =============================================================================
 39 | 
 40 | # The RetrievalQA chain combines:
 41 | #   - The language model (model) to generate responses.
 42 | #   - A retriever (db.as_retriever) that fetches relevant document chunks based on the query.
 43 | #   - A prompt that provides instructions on how to answer the query.
 44 | def run(input):
 45 |     chromadb.api.client.SharedSystemClient.clear_system_cache()
 46 |     load_dotenv()
 47 |     # Retrieve tokens from environment variables
 48 |     openai_api_key = os.getenv("OPENAI_API_KEY")
 49 | 
 50 |     # =============================================================================
 51 |     # 2. Initialize the Language Model with Ollama
 52 |     # =============================================================================
 53 | 
 54 |     # Here, we use a locally pulled model called "deepseek-r1" through OllamaLLM.
 55 |     # This model will be used later in our RetrievalQA chain.
 56 |     model = OllamaLLM(model="llama3.1")
 57 | 
 58 |     # =============================================================================
 59 |     # 3. Document Preprocessing Function
 60 |     # =============================================================================
 61 | 
 62 |     def docs_preprocessing_helper(file):
 63 |         """
 64 |         Helper function to load and preprocess a CSV file containing data.
 65 |         
 66 |         This function performs two main tasks:
 67 |         1. Loads the CSV file using CSVLoader from LangChain.
 68 |         2. Splits the loaded documents into smaller text chunks using CharacterTextSplitter.
 69 |         
 70 |         Args:
 71 |             file (str): Path to the CSV file.
 72 |             
 73 |         Returns:
 74 |             list: A list of document chunks ready for embedding and indexing.
 75 |         
 76 |         Raises:
 77 |             TypeError: If the output is not in the expected dataframe/document format.
 78 |         """
 79 |         # Load the CSV file using LangChain's CSVLoader.
 80 |         loader = CSVLoader(file)
 81 |         docs = loader.load()
 82 |         
 83 |         # Create a text splitter that divides the documents into chunks of up to 1000 characters
 84 |         # with no overlapping text between chunks.
 85 |         text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 86 |         docs = text_splitter.split_documents(docs)
 87 |         
 88 |         return docs
 89 | 
 90 |     # Preprocess the CSV file "survey.csv" and store the document chunks in 'docs'.
 91 |     docs = docs_preprocessing_helper('survey.csv')
 92 | 
 93 |     # =============================================================================
 94 |     # 4. Set Up the Embedding Function and Chroma Database
 95 |     # =============================================================================
 96 | 
 97 |     # Initialize the embedding function from OpenAI. This converts text into numerical vectors.
 98 |     # The OpenAIEmbeddings class uses the openai_api_key for authentication.
 99 |     embedding_function = OpenAIEmbeddings(openai_api_key=openai_api_key)
100 | 
101 |     # Create a vector store (ChromaDB) from the document chunks using the embedding function.
102 |     # The Chroma database will be used to retrieve the most relevant documents based on the query.
103 |     db = Chroma.from_documents(docs, embedding_function)
104 | 
105 |     # =============================================================================
106 |     # 5. Define and Initialize the Prompt Template
107 |     # =============================================================================
108 | 
109 |     # Define a prompt template that instructs the chatbot on how to answer customer queries.
110 |     # The template includes context information and instructs the bot to use only provided data.
111 |     template = """You are a clothing consultant chatbot.
112 | 
113 |     Answer the customer's questions. When relevant questions come, use the provided documents. Please answer to their specific question. If you are unsure, say "I don't know, please call our customer support". Use engaging, courteous, and professional language similar to a customer representative.
114 |     Keep your answers concise.
115 | 
116 |     {context}
117 | 
118 |     """
119 | 
120 |     # Create a PromptTemplate object from LangChain with the defined template.
121 |     # It expects a variable called "context" that can be filled later.
122 |     prompt = PromptTemplate(template=template, input_variables=["context"])
123 | 
124 |     # Format the prompt with a general context message.
125 |     # This additional context tells the chatbot the scenario in which it will be answering questions.
126 |     formatted_prompt = prompt.format(
127 |         context="A customer is on the clothing company website and wants to chat with the website chatbot. They will ask you a question. Please answer to their specific question"
128 |     )
129 |     chain_type_kwargs = {"prompt": prompt}  # Pass our custom prompt template to the chain.
130 |     chain = RetrievalQA.from_chain_type(
131 |         llm=model,  # The language model (OllamaLLM with deepseek-r1)
132 |         chain_type="stuff",  # The "stuff" chain type combines all retrieved documents into one context.
133 |         retriever=db.as_retriever(search_kwargs={"k": 1}),  # Retrieve the top relevant document chunk.
134 |         chain_type_kwargs=chain_type_kwargs,
135 |     )
136 |     response = chain.run(input)
137 |     return response
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------