├── .gitignore
├── requirements.txt
├── docker-compose.yml
├── README.md
└── redis-langchain-ecommerce-chatbot.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | *.ipynb_checkpoints


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.123
2 | redis==4.5.3
3 | openai==0.27.2
4 | numpy
5 | pandas
6 | gdown


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | 
 3 | services:
 4 |   redis:
 5 |     image: redis/redis-stack-server:latest
 6 |     ports:
 7 |       - "6379:6379"
 8 |     volumes:
 9 |         - redis_data:/data
10 |     healthcheck:
11 |       test: ["CMD", "redis-cli", "-h", "localhost", "-p", "6379", "ping"]
12 |       interval: 2s
13 |       timeout: 1m30s
14 |       retries: 5
15 |       start_period: 5s
16 |   jupyter:
17 |     image: jupyter/minimal-notebook:latest
18 |     container_name: jupyter
19 |     volumes:
20 |       - ./:/home/jovyan/chatbot
21 |     ports:
22 |       - 8888:8888
23 |     depends_on:
24 |       - "redis"
25 |     environment:
26 |       JUPYTER_ENABLE_LAB: "yes"
27 |       OPENAI_API_KEY: "YOUR API KEY HERE"
28 | 
29 | volumes:
30 |   redis_data:


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Build an Ecommerce Chatbot with Redis, LangChain, and OpenAI
 2 | 
 3 | 
 4 | >*Powered by [Redis](https://redis.io), [LangChain](https://python.langchain.com/en/latest/), and [OpenAI](https://platform.openai.com)*
 5 | 
 6 | In this tutorial we build a conversational retail shopping assistant that helps customers find items of interest that are buried in a product catalog. Our chatbot will take user input, find relevant products, and present the information in a friendly and detailed manner.
 7 | 
 8 | The source code here goes along with [this Redis blog post](https://redis.com/blog/build-ecommerce-chatbot-with-redis/). Try various prompt-engineering techniques to improve on this prototype for your use case!
 9 | 
10 | ## Getting Started
11 | 
12 | 1. [Get an OpenAI API Key](https://platform.openai.com).
13 | 2. Add the API key to the [`docker-compose.yml`](./docker-compose.yml) file here in the repo.
14 | 3. Start up the docker compose environment:
15 |     ```bash
16 |     docker compose up
17 |     ```
18 | 
19 | ## Coming Soon
20 | 
21 | - Extensions to LangChain + Redis integration for conversational memory storage
22 | - Have an idea or contribution to make this even better? Open an issue -- let's collaborate!
23 | 


--------------------------------------------------------------------------------
/redis-langchain-ecommerce-chatbot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Redis LangChain OpenAI eCommerce Chatbot"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "colab": {
 15 |      "base_uri": "https://localhost:8080/"
 16 |     },
 17 |     "id": "5-h_nDGp3Kdf",
 18 |     "outputId": "94191443-3844-4c1d-a26f-7619d976a55b",
 19 |     "tags": []
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Install requirements\n",
 24 |     "!pip install -r requirements.txt"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {
 31 |     "tags": []
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Download the dataset\n",
 36 |     "!gdown --id 1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Preprocess dataset"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "tags": []
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "import pandas as pd\n",
 55 |     "\n",
 56 |     "MAX_TEXT_LENGTH=512\n",
 57 |     "\n",
 58 |     "def auto_truncate(val):\n",
 59 |     "    \"\"\"Truncate the given text.\"\"\"\n",
 60 |     "    return val[:MAX_TEXT_LENGTH]\n",
 61 |     "\n",
 62 |     "# Load Product data and truncate long text fields\n",
 63 |     "all_prods_df = pd.read_csv(\"product_data.csv\", converters={\n",
 64 |     "    'bullet_point': auto_truncate,\n",
 65 |     "    'item_keywords': auto_truncate,\n",
 66 |     "    'item_name': auto_truncate\n",
 67 |     "})"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "colab": {
 75 |      "base_uri": "https://localhost:8080/",
 76 |      "height": 669
 77 |     },
 78 |     "id": "00_n4VWH7FoB",
 79 |     "outputId": "f26daa8c-4af9-4def-d5ab-3197777fe2f9",
 80 |     "tags": []
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# Contruct a primary key from item ID and domain name\n",
 85 |     "all_prods_df['primary_key'] = (\n",
 86 |     "    all_prods_df['item_id'] + '-' + all_prods_df['domain_name']\n",
 87 |     ")\n",
 88 |     "# Replace empty strings with None and drop\n",
 89 |     "all_prods_df['item_keywords'].replace('', None, inplace=True)\n",
 90 |     "all_prods_df.dropna(subset=['item_keywords'], inplace=True)\n",
 91 |     "\n",
 92 |     "# Reset pandas dataframe index\n",
 93 |     "all_prods_df.reset_index(drop=True, inplace=True)\n",
 94 |     "\n",
 95 |     "all_prods_df.head()"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "tags": []
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "# Num products to use (subset)\n",
107 |     "NUMBER_PRODUCTS = 2500  \n",
108 |     "\n",
109 |     "# Get the first 1000 products with non-empty item keywords\n",
110 |     "product_metadata = ( \n",
111 |     "    all_prods_df\n",
112 |     "     .head(NUMBER_PRODUCTS)\n",
113 |     "     .to_dict(orient='index')\n",
114 |     ")"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "id": "Iw7rlppY8f3a",
122 |     "tags": []
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "# Check one of the products\n",
127 |     "product_metadata[0]"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "## Set up Redis as a vector db"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "tags": []
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "from langchain.embeddings import OpenAIEmbeddings\n",
146 |     "from langchain.vectorstores.redis import Redis as RedisVectorStore\n",
147 |     "\n",
148 |     "# data that will be embedded and converted to vectors\n",
149 |     "texts = [\n",
150 |     "    v['item_name'] for k, v in product_metadata.items()\n",
151 |     "]\n",
152 |     "\n",
153 |     "# product metadata that we'll store along our vectors\n",
154 |     "metadatas = list(product_metadata.values())\n",
155 |     "\n",
156 |     "# we will use OpenAI as our embeddings provider\n",
157 |     "embedding = OpenAIEmbeddings()\n",
158 |     "\n",
159 |     "# name of the Redis search index to create\n",
160 |     "index_name = \"products\"\n",
161 |     "\n",
162 |     "# assumes you have a redis stack server running on within your docker compose network\n",
163 |     "redis_url = \"redis://redis:6379\"\n",
164 |     "\n",
165 |     "# create and load redis with documents\n",
166 |     "vectorstore = RedisVectorStore.from_texts(\n",
167 |     "    texts=texts,\n",
168 |     "    metadatas=metadatas,\n",
169 |     "    embedding=embedding,\n",
170 |     "    index_name=index_name,\n",
171 |     "    redis_url=redis_url\n",
172 |     ")"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "## Build the ChatBot with ConversationalRetrieverChain"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "tags": []
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "from langchain.callbacks.base import CallbackManager\n",
191 |     "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
192 |     "from langchain.chains import (\n",
193 |     "    ConversationalRetrievalChain,\n",
194 |     "    LLMChain\n",
195 |     ")\n",
196 |     "from langchain.chains.question_answering import load_qa_chain\n",
197 |     "from langchain.llms import OpenAI\n",
198 |     "from langchain.prompts.prompt import PromptTemplate\n",
199 |     "\n",
200 |     "template = \"\"\"Given the following chat history and a follow up question, rephrase the follow up input question to be a standalone question.\n",
201 |     "Or end the conversation if it seems like it's done.\n",
202 |     "\n",
203 |     "Chat History:\\\"\"\"\n",
204 |     "{chat_history}\n",
205 |     "\\\"\"\"\n",
206 |     "\n",
207 |     "Follow Up Input: \\\"\"\"\n",
208 |     "{question}\n",
209 |     "\\\"\"\"\n",
210 |     "\n",
211 |     "Standalone question:\"\"\"\n",
212 |     "\n",
213 |     "condense_question_prompt = PromptTemplate.from_template(template)\n",
214 |     "\n",
215 |     "template = \"\"\"You are a friendly, conversational retail shopping assistant. Use the following context including product names, descriptions, and keywords to show the shopper whats available, help find what they want, and answer any questions.\n",
216 |     "It's ok if you don't know the answer.\n",
217 |     "\n",
218 |     "Context:\\\"\"\"\n",
219 |     "{context}\n",
220 |     "\\\"\"\"\n",
221 |     "\n",
222 |     "Question:\\\"\n",
223 |     "\\\"\"\"\n",
224 |     "\n",
225 |     "Helpful Answer:\"\"\"\n",
226 |     "\n",
227 |     "qa_prompt= PromptTemplate.from_template(template)\n",
228 |     "\n",
229 |     "\n",
230 |     "# define two LLM models from OpenAI\n",
231 |     "llm = OpenAI(temperature=0)\n",
232 |     "\n",
233 |     "streaming_llm = OpenAI(\n",
234 |     "    streaming=True,\n",
235 |     "    callback_manager=CallbackManager([\n",
236 |     "        StreamingStdOutCallbackHandler()]),\n",
237 |     "    verbose=True,\n",
238 |     "    temperature=0.2,\n",
239 |     "    max_tokens=150\n",
240 |     ")\n",
241 |     "\n",
242 |     "# use the LLM Chain to create a question creation chain\n",
243 |     "question_generator = LLMChain(\n",
244 |     "    llm=llm,\n",
245 |     "    prompt=condense_question_prompt\n",
246 |     ")\n",
247 |     "\n",
248 |     "# use the streaming LLM to create a question answering chain\n",
249 |     "doc_chain = load_qa_chain(\n",
250 |     "    llm=streaming_llm,\n",
251 |     "    chain_type=\"stuff\",\n",
252 |     "    prompt=qa_prompt\n",
253 |     ")\n",
254 |     "\n",
255 |     "\n",
256 |     "chatbot = ConversationalRetrievalChain(\n",
257 |     "    retriever=vectorstore.as_retriever(),\n",
258 |     "    combine_docs_chain=doc_chain,\n",
259 |     "    question_generator=question_generator\n",
260 |     ")"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {
267 |     "tags": []
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "# create a chat history buffer\n",
272 |     "chat_history = []\n",
273 |     "\n",
274 |     "# gather user input for the first question to kick off the bot\n",
275 |     "question = input(\"Hi! What are you looking for today?\")\n",
276 |     "\n",
277 |     "# keep the bot running in a loop to simulate a conversation\n",
278 |     "while True:\n",
279 |     "    result = chatbot(\n",
280 |     "        {\"question\": question, \"chat_history\": chat_history}\n",
281 |     "    )\n",
282 |     "    print(\"\\n\")\n",
283 |     "    chat_history.append((result[\"question\"], result[\"answer\"]))\n",
284 |     "    question = input()"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "## Customize your chains for even better performance"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "metadata": {
298 |     "tags": []
299 |    },
300 |    "outputs": [],
301 |    "source": [
302 |     "import json\n",
303 |     "\n",
304 |     "from langchain.schema import BaseRetriever\n",
305 |     "from langchain.vectorstores import VectorStore\n",
306 |     "from langchain.schema import Document\n",
307 |     "from pydantic import BaseModel\n",
308 |     "\n",
309 |     "\n",
310 |     "class RedisProductRetriever(BaseRetriever, BaseModel):\n",
311 |     "    vectorstore: VectorStore\n",
312 |     "\n",
313 |     "    class Config:\n",
314 |     "        \n",
315 |     "        arbitrary_types_allowed = True\n",
316 |     "\n",
317 |     "    def combine_metadata(self, doc) -> str:\n",
318 |     "        metadata = doc.metadata\n",
319 |     "        return (\n",
320 |     "            \"Item Name: \" + metadata[\"item_name\"] + \". \" +\n",
321 |     "            \"Item Description: \" + metadata[\"bullet_point\"] + \". \" +\n",
322 |     "            \"Item Keywords: \" + metadata[\"item_keywords\"] + \".\"\n",
323 |     "        )\n",
324 |     "\n",
325 |     "    def get_relevant_documents(self, query):\n",
326 |     "        docs = []\n",
327 |     "        for doc in self.vectorstore.similarity_search(query):\n",
328 |     "            content = self.combine_metadata(doc)\n",
329 |     "            docs.append(Document(\n",
330 |     "                page_content=content,\n",
331 |     "                metadata=doc.metadata\n",
332 |     "            ))\n",
333 |     "        return docs"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "### Setup ChatBot with new retriever"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {
347 |     "tags": []
348 |    },
349 |    "outputs": [],
350 |    "source": [
351 |     "redis_product_retriever = RedisProductRetriever(vectorstore=vectorstore)\n",
352 |     "\n",
353 |     "chatbot = ConversationalRetrievalChain(\n",
354 |     "    retriever=redis_product_retriever,\n",
355 |     "    combine_docs_chain=doc_chain,\n",
356 |     "    question_generator=question_generator\n",
357 |     ")"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "### Retry"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": null,
370 |    "metadata": {
371 |     "tags": []
372 |    },
373 |    "outputs": [],
374 |    "source": [
375 |     "# create a chat history buffer\n",
376 |     "chat_history = []\n",
377 |     "\n",
378 |     "# gather user input for the first question to kick off the bot\n",
379 |     "question = input(\"Hi! What are you looking for today?\")\n",
380 |     "\n",
381 |     "# keep the bot running in a loop to simulate a conversation\n",
382 |     "while True:\n",
383 |     "    result = chatbot(\n",
384 |     "        {\"question\": question, \"chat_history\": chat_history}\n",
385 |     "    )\n",
386 |     "    print(\"\\n\")\n",
387 |     "    chat_history.append((result[\"question\"], result[\"answer\"]))\n",
388 |     "    question = input()"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": []
397 |   }
398 |  ],
399 |  "metadata": {
400 |   "colab": {
401 |    "provenance": []
402 |   },
403 |   "kernelspec": {
404 |    "display_name": "Python 3 (ipykernel)",
405 |    "language": "python",
406 |    "name": "python3"
407 |   },
408 |   "language_info": {
409 |    "codemirror_mode": {
410 |     "name": "ipython",
411 |     "version": 3
412 |    },
413 |    "file_extension": ".py",
414 |    "mimetype": "text/x-python",
415 |    "name": "python",
416 |    "nbconvert_exporter": "python",
417 |    "pygments_lexer": "ipython3",
418 |    "version": "3.10.10"
419 |   }
420 |  },
421 |  "nbformat": 4,
422 |  "nbformat_minor": 4
423 | }
424 | 


--------------------------------------------------------------------------------