├── .gitignore ├── requirements.txt ├── docker-compose.yml ├── README.md └── redis-langchain-ecommerce-chatbot.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.ipynb_checkpoints -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.123 2 | redis==4.5.3 3 | openai==0.27.2 4 | numpy 5 | pandas 6 | gdown -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | redis: 5 | image: redis/redis-stack-server:latest 6 | ports: 7 | - "6379:6379" 8 | volumes: 9 | - redis_data:/data 10 | healthcheck: 11 | test: ["CMD", "redis-cli", "-h", "localhost", "-p", "6379", "ping"] 12 | interval: 2s 13 | timeout: 1m30s 14 | retries: 5 15 | start_period: 5s 16 | jupyter: 17 | image: jupyter/minimal-notebook:latest 18 | container_name: jupyter 19 | volumes: 20 | - ./:/home/jovyan/chatbot 21 | ports: 22 | - 8888:8888 23 | depends_on: 24 | - "redis" 25 | environment: 26 | JUPYTER_ENABLE_LAB: "yes" 27 | OPENAI_API_KEY: "YOUR API KEY HERE" 28 | 29 | volumes: 30 | redis_data: -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Build an Ecommerce Chatbot with Redis, LangChain, and OpenAI 2 | 3 | 4 | >*Powered by [Redis](https://redis.io), [LangChain](https://python.langchain.com/en/latest/), and [OpenAI](https://platform.openai.com)* 5 | 6 | In this tutorial we build a conversational retail shopping assistant that helps customers find items of interest that are buried in a product catalog. Our chatbot will take user input, find relevant products, and present the information in a friendly and detailed manner. 7 | 8 | The source code here goes along with [this Redis blog post](https://redis.com/blog/build-ecommerce-chatbot-with-redis/). Try various prompt-engineering techniques to improve on this prototype for your use case! 9 | 10 | ## Getting Started 11 | 12 | 1. [Get an OpenAI API Key](https://platform.openai.com). 13 | 2. Add the API key to the [`docker-compose.yml`](./docker-compose.yml) file here in the repo. 14 | 3. Start up the docker compose environment: 15 | ```bash 16 | docker compose up 17 | ``` 18 | 19 | ## Coming Soon 20 | 21 | - Extensions to LangChain + Redis integration for conversational memory storage 22 | - Have an idea or contribution to make this even better? Open an issue -- let's collaborate! 23 | -------------------------------------------------------------------------------- /redis-langchain-ecommerce-chatbot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Redis LangChain OpenAI eCommerce Chatbot" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "colab": { 15 | "base_uri": "https://localhost:8080/" 16 | }, 17 | "id": "5-h_nDGp3Kdf", 18 | "outputId": "94191443-3844-4c1d-a26f-7619d976a55b", 19 | "tags": [] 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# Install requirements\n", 24 | "!pip install -r requirements.txt" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "tags": [] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# Download the dataset\n", 36 | "!gdown --id 1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Preprocess dataset" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "tags": [] 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "import pandas as pd\n", 55 | "\n", 56 | "MAX_TEXT_LENGTH=512\n", 57 | "\n", 58 | "def auto_truncate(val):\n", 59 | " \"\"\"Truncate the given text.\"\"\"\n", 60 | " return val[:MAX_TEXT_LENGTH]\n", 61 | "\n", 62 | "# Load Product data and truncate long text fields\n", 63 | "all_prods_df = pd.read_csv(\"product_data.csv\", converters={\n", 64 | " 'bullet_point': auto_truncate,\n", 65 | " 'item_keywords': auto_truncate,\n", 66 | " 'item_name': auto_truncate\n", 67 | "})" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "colab": { 75 | "base_uri": "https://localhost:8080/", 76 | "height": 669 77 | }, 78 | "id": "00_n4VWH7FoB", 79 | "outputId": "f26daa8c-4af9-4def-d5ab-3197777fe2f9", 80 | "tags": [] 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "# Contruct a primary key from item ID and domain name\n", 85 | "all_prods_df['primary_key'] = (\n", 86 | " all_prods_df['item_id'] + '-' + all_prods_df['domain_name']\n", 87 | ")\n", 88 | "# Replace empty strings with None and drop\n", 89 | "all_prods_df['item_keywords'].replace('', None, inplace=True)\n", 90 | "all_prods_df.dropna(subset=['item_keywords'], inplace=True)\n", 91 | "\n", 92 | "# Reset pandas dataframe index\n", 93 | "all_prods_df.reset_index(drop=True, inplace=True)\n", 94 | "\n", 95 | "all_prods_df.head()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "tags": [] 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# Num products to use (subset)\n", 107 | "NUMBER_PRODUCTS = 2500 \n", 108 | "\n", 109 | "# Get the first 1000 products with non-empty item keywords\n", 110 | "product_metadata = ( \n", 111 | " all_prods_df\n", 112 | " .head(NUMBER_PRODUCTS)\n", 113 | " .to_dict(orient='index')\n", 114 | ")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "id": "Iw7rlppY8f3a", 122 | "tags": [] 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "# Check one of the products\n", 127 | "product_metadata[0]" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Set up Redis as a vector db" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "tags": [] 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "from langchain.embeddings import OpenAIEmbeddings\n", 146 | "from langchain.vectorstores.redis import Redis as RedisVectorStore\n", 147 | "\n", 148 | "# data that will be embedded and converted to vectors\n", 149 | "texts = [\n", 150 | " v['item_name'] for k, v in product_metadata.items()\n", 151 | "]\n", 152 | "\n", 153 | "# product metadata that we'll store along our vectors\n", 154 | "metadatas = list(product_metadata.values())\n", 155 | "\n", 156 | "# we will use OpenAI as our embeddings provider\n", 157 | "embedding = OpenAIEmbeddings()\n", 158 | "\n", 159 | "# name of the Redis search index to create\n", 160 | "index_name = \"products\"\n", 161 | "\n", 162 | "# assumes you have a redis stack server running on within your docker compose network\n", 163 | "redis_url = \"redis://redis:6379\"\n", 164 | "\n", 165 | "# create and load redis with documents\n", 166 | "vectorstore = RedisVectorStore.from_texts(\n", 167 | " texts=texts,\n", 168 | " metadatas=metadatas,\n", 169 | " embedding=embedding,\n", 170 | " index_name=index_name,\n", 171 | " redis_url=redis_url\n", 172 | ")" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Build the ChatBot with ConversationalRetrieverChain" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "tags": [] 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "from langchain.callbacks.base import CallbackManager\n", 191 | "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", 192 | "from langchain.chains import (\n", 193 | " ConversationalRetrievalChain,\n", 194 | " LLMChain\n", 195 | ")\n", 196 | "from langchain.chains.question_answering import load_qa_chain\n", 197 | "from langchain.llms import OpenAI\n", 198 | "from langchain.prompts.prompt import PromptTemplate\n", 199 | "\n", 200 | "template = \"\"\"Given the following chat history and a follow up question, rephrase the follow up input question to be a standalone question.\n", 201 | "Or end the conversation if it seems like it's done.\n", 202 | "\n", 203 | "Chat History:\\\"\"\"\n", 204 | "{chat_history}\n", 205 | "\\\"\"\"\n", 206 | "\n", 207 | "Follow Up Input: \\\"\"\"\n", 208 | "{question}\n", 209 | "\\\"\"\"\n", 210 | "\n", 211 | "Standalone question:\"\"\"\n", 212 | "\n", 213 | "condense_question_prompt = PromptTemplate.from_template(template)\n", 214 | "\n", 215 | "template = \"\"\"You are a friendly, conversational retail shopping assistant. Use the following context including product names, descriptions, and keywords to show the shopper whats available, help find what they want, and answer any questions.\n", 216 | "It's ok if you don't know the answer.\n", 217 | "\n", 218 | "Context:\\\"\"\"\n", 219 | "{context}\n", 220 | "\\\"\"\"\n", 221 | "\n", 222 | "Question:\\\"\n", 223 | "\\\"\"\"\n", 224 | "\n", 225 | "Helpful Answer:\"\"\"\n", 226 | "\n", 227 | "qa_prompt= PromptTemplate.from_template(template)\n", 228 | "\n", 229 | "\n", 230 | "# define two LLM models from OpenAI\n", 231 | "llm = OpenAI(temperature=0)\n", 232 | "\n", 233 | "streaming_llm = OpenAI(\n", 234 | " streaming=True,\n", 235 | " callback_manager=CallbackManager([\n", 236 | " StreamingStdOutCallbackHandler()]),\n", 237 | " verbose=True,\n", 238 | " temperature=0.2,\n", 239 | " max_tokens=150\n", 240 | ")\n", 241 | "\n", 242 | "# use the LLM Chain to create a question creation chain\n", 243 | "question_generator = LLMChain(\n", 244 | " llm=llm,\n", 245 | " prompt=condense_question_prompt\n", 246 | ")\n", 247 | "\n", 248 | "# use the streaming LLM to create a question answering chain\n", 249 | "doc_chain = load_qa_chain(\n", 250 | " llm=streaming_llm,\n", 251 | " chain_type=\"stuff\",\n", 252 | " prompt=qa_prompt\n", 253 | ")\n", 254 | "\n", 255 | "\n", 256 | "chatbot = ConversationalRetrievalChain(\n", 257 | " retriever=vectorstore.as_retriever(),\n", 258 | " combine_docs_chain=doc_chain,\n", 259 | " question_generator=question_generator\n", 260 | ")" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "tags": [] 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "# create a chat history buffer\n", 272 | "chat_history = []\n", 273 | "\n", 274 | "# gather user input for the first question to kick off the bot\n", 275 | "question = input(\"Hi! What are you looking for today?\")\n", 276 | "\n", 277 | "# keep the bot running in a loop to simulate a conversation\n", 278 | "while True:\n", 279 | " result = chatbot(\n", 280 | " {\"question\": question, \"chat_history\": chat_history}\n", 281 | " )\n", 282 | " print(\"\\n\")\n", 283 | " chat_history.append((result[\"question\"], result[\"answer\"]))\n", 284 | " question = input()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "## Customize your chains for even better performance" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "tags": [] 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "import json\n", 303 | "\n", 304 | "from langchain.schema import BaseRetriever\n", 305 | "from langchain.vectorstores import VectorStore\n", 306 | "from langchain.schema import Document\n", 307 | "from pydantic import BaseModel\n", 308 | "\n", 309 | "\n", 310 | "class RedisProductRetriever(BaseRetriever, BaseModel):\n", 311 | " vectorstore: VectorStore\n", 312 | "\n", 313 | " class Config:\n", 314 | " \n", 315 | " arbitrary_types_allowed = True\n", 316 | "\n", 317 | " def combine_metadata(self, doc) -> str:\n", 318 | " metadata = doc.metadata\n", 319 | " return (\n", 320 | " \"Item Name: \" + metadata[\"item_name\"] + \". \" +\n", 321 | " \"Item Description: \" + metadata[\"bullet_point\"] + \". \" +\n", 322 | " \"Item Keywords: \" + metadata[\"item_keywords\"] + \".\"\n", 323 | " )\n", 324 | "\n", 325 | " def get_relevant_documents(self, query):\n", 326 | " docs = []\n", 327 | " for doc in self.vectorstore.similarity_search(query):\n", 328 | " content = self.combine_metadata(doc)\n", 329 | " docs.append(Document(\n", 330 | " page_content=content,\n", 331 | " metadata=doc.metadata\n", 332 | " ))\n", 333 | " return docs" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "### Setup ChatBot with new retriever" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "tags": [] 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "redis_product_retriever = RedisProductRetriever(vectorstore=vectorstore)\n", 352 | "\n", 353 | "chatbot = ConversationalRetrievalChain(\n", 354 | " retriever=redis_product_retriever,\n", 355 | " combine_docs_chain=doc_chain,\n", 356 | " question_generator=question_generator\n", 357 | ")" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "### Retry" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "tags": [] 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "# create a chat history buffer\n", 376 | "chat_history = []\n", 377 | "\n", 378 | "# gather user input for the first question to kick off the bot\n", 379 | "question = input(\"Hi! What are you looking for today?\")\n", 380 | "\n", 381 | "# keep the bot running in a loop to simulate a conversation\n", 382 | "while True:\n", 383 | " result = chatbot(\n", 384 | " {\"question\": question, \"chat_history\": chat_history}\n", 385 | " )\n", 386 | " print(\"\\n\")\n", 387 | " chat_history.append((result[\"question\"], result[\"answer\"]))\n", 388 | " question = input()" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [] 397 | } 398 | ], 399 | "metadata": { 400 | "colab": { 401 | "provenance": [] 402 | }, 403 | "kernelspec": { 404 | "display_name": "Python 3 (ipykernel)", 405 | "language": "python", 406 | "name": "python3" 407 | }, 408 | "language_info": { 409 | "codemirror_mode": { 410 | "name": "ipython", 411 | "version": 3 412 | }, 413 | "file_extension": ".py", 414 | "mimetype": "text/x-python", 415 | "name": "python", 416 | "nbconvert_exporter": "python", 417 | "pygments_lexer": "ipython3", 418 | "version": "3.10.10" 419 | } 420 | }, 421 | "nbformat": 4, 422 | "nbformat_minor": 4 423 | } 424 | --------------------------------------------------------------------------------