├── .gitignore ├── Gemini_Redis.ipynb ├── LICENSE ├── README.md ├── assets ├── GCP_RE_GenAI.drawio.png └── Redis_GCN_GenAI_20240325.png └── examples ├── README.md └── chat-your-pdf ├── .env.example ├── Dockerfile ├── README.md ├── app ├── config.py ├── main.py ├── pdfs │ └── 2022-chevrolet-colorado-ebrochure.pdf └── requirements.txt └── docker-compose.yml /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | notes.txt 3 | .DS_Store 4 | repos 5 | .env 6 | *.json -------------------------------------------------------------------------------- /Gemini_Redis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "aiHV6ip2f5id" 7 | }, 8 | "source": [ 9 | "# LLM Reference Architecture using Redis & Google Cloud Platform\n", 10 | "\n", 11 | "\"Open\n", 12 | "\n", 13 | "This notebook serves as a getting started guide for working with LLMs on Google Cloud Platform with Redis Enterprise.\n", 14 | "\n", 15 | "## Intro\n", 16 | "Google's Vertex AI has expanded its capabilities by introducing [Generative AI](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview). This advanced technology comes with a specialized [in-console studio experience](https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/quickstart), a [dedicated API](https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/api-quickstart) and [Python SDK](https://cloud.google.com/vertex-ai/docs/python-sdk/use-vertex-ai-python-sdk) designed for deploying and managing instances of Google's powerful Gemini language models.\n", 17 | "\n", 18 | "Redis Enterprise offers robust vector database features, with an efficient API for vector index creation, management, distance metric selection, similarity search, and hybrid filtering. When coupled with its versatile data structures - including lists, hashes, JSON, and sets - Redis Enterprise shines as the optimal solution for crafting high-quality Large Language Model (LLM)-based applications. It embodies a streamlined architecture and exceptional performance, making it an instrumental tool for production environments.\n", 19 | "\n", 20 | "Below we will work through several design patterns with Vertex AI LLMs and Redis Enterprise that will ensure optimal production performance.\n", 21 | "\n", 22 | "___\n", 23 | "## Contents\n", 24 | "- Setup\n", 25 | " 1. Prerequisites\n", 26 | " 2. Obtain Dataset\n", 27 | " 3. Generate Embeddings\n", 28 | " 4. Create Index\n", 29 | " 5. Query\n", 30 | "- Building a RAG Pipeline from scratch\n", 31 | "- Demo\n", 32 | "\n", 33 | "___" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "id": "rK2rWODkw-kX" 40 | }, 41 | "source": [ 42 | "# Setup" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "37rbBPKdL09o" 49 | }, 50 | "source": [ 51 | "## 1. Prerequisites\n", 52 | "Before we begin, we must install some required libraries, authenticate with Google, create a Redis database, and initialize other required components.\n", 53 | "\n", 54 | "### Install required libraries" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 1, 60 | "metadata": { 61 | "colab": { 62 | "base_uri": "https://localhost:8080/", 63 | "height": 1000 64 | }, 65 | "collapsed": true, 66 | "id": "pc-IxYu3wnQm", 67 | "outputId": "2b333ebc-0cd3-4e38-c8c7-6dc256d7939b" 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Collecting redisvl\n", 75 | " Downloading redisvl-0.2.0-py3-none-any.whl (63 kB)\n", 76 | "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/63.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━\u001b[0m \u001b[32m61.4/63.9 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 77 | "\u001b[?25hRequirement already satisfied: google-cloud-aiplatform in /usr/local/lib/python3.10/dist-packages (1.52.0)\n", 78 | "Collecting langchain-community\n", 79 | " Downloading langchain_community-0.2.1-py3-none-any.whl (2.1 MB)\n", 80 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 81 | "\u001b[?25hCollecting unstructured[pdf]\n", 82 | " Downloading unstructured-0.14.3-py3-none-any.whl (2.0 MB)\n", 83 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 84 | "\u001b[?25hCollecting gradio\n", 85 | " Downloading gradio-4.31.5-py3-none-any.whl (12.3 MB)\n", 86 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m34.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 87 | "\u001b[?25hCollecting coloredlogs (from redisvl)\n", 88 | " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", 89 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 90 | "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from redisvl) (1.25.2)\n", 91 | "Requirement already satisfied: pydantic<3,>=2 in /usr/local/lib/python3.10/dist-packages (from redisvl) (2.7.1)\n", 92 | "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from redisvl) (6.0.1)\n", 93 | "Collecting redis>=5.0.0 (from redisvl)\n", 94 | " Downloading redis-5.0.4-py3-none-any.whl (251 kB)\n", 95 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m252.0/252.0 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 96 | "\u001b[?25hRequirement already satisfied: tabulate<1,>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from redisvl) (0.9.0)\n", 97 | "Requirement already satisfied: tenacity>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from redisvl) (8.3.0)\n", 98 | "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (2.11.1)\n", 99 | "Requirement already satisfied: google-auth<3.0.0dev,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (2.27.0)\n", 100 | "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (1.23.0)\n", 101 | "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (3.20.3)\n", 102 | "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (24.0)\n", 103 | "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (2.8.0)\n", 104 | "Requirement already satisfied: google-cloud-bigquery!=3.20.0,<4.0.0dev,>=1.15.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (3.21.0)\n", 105 | "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (1.12.3)\n", 106 | "Requirement already satisfied: shapely<3.0.0dev in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (2.0.4)\n", 107 | "Requirement already satisfied: docstring-parser<1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (0.16)\n", 108 | "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.0.30)\n", 109 | "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (3.9.5)\n", 110 | "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)\n", 111 | " Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)\n", 112 | "Collecting langchain<0.3.0,>=0.2.0 (from langchain-community)\n", 113 | " Downloading langchain-0.2.1-py3-none-any.whl (973 kB)\n", 114 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m973.5/973.5 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 115 | "\u001b[?25hCollecting langchain-core<0.3.0,>=0.2.0 (from langchain-community)\n", 116 | " Downloading langchain_core-0.2.1-py3-none-any.whl (308 kB)\n", 117 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m308.5/308.5 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 118 | "\u001b[?25hCollecting langsmith<0.2.0,>=0.1.0 (from langchain-community)\n", 119 | " Downloading langsmith-0.1.63-py3-none-any.whl (122 kB)\n", 120 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.8/122.8 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 121 | "\u001b[?25hRequirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.31.0)\n", 122 | "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (5.2.0)\n", 123 | "Collecting filetype (from unstructured[pdf])\n", 124 | " Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", 125 | "Collecting python-magic (from unstructured[pdf])\n", 126 | " Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)\n", 127 | "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (4.9.4)\n", 128 | "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (3.8.1)\n", 129 | "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (4.12.3)\n", 130 | "Collecting emoji (from unstructured[pdf])\n", 131 | " Downloading emoji-2.12.1-py3-none-any.whl (431 kB)\n", 132 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m431.4/431.4 kB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 133 | "\u001b[?25hCollecting python-iso639 (from unstructured[pdf])\n", 134 | " Downloading python_iso639-2024.4.27-py3-none-any.whl (274 kB)\n", 135 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.7/274.7 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 136 | "\u001b[?25hCollecting langdetect (from unstructured[pdf])\n", 137 | " Downloading langdetect-1.0.9.tar.gz (981 kB)\n", 138 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 139 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 140 | "Collecting rapidfuzz (from unstructured[pdf])\n", 141 | " Downloading rapidfuzz-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", 142 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 143 | "\u001b[?25hCollecting backoff (from unstructured[pdf])\n", 144 | " Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n", 145 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (4.11.0)\n", 146 | "Collecting unstructured-client (from unstructured[pdf])\n", 147 | " Downloading unstructured_client-0.22.0-py3-none-any.whl (28 kB)\n", 148 | "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured[pdf]) (1.14.1)\n", 149 | "Collecting onnx (from unstructured[pdf])\n", 150 | " Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n", 151 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 152 | "\u001b[?25hCollecting pdf2image (from unstructured[pdf])\n", 153 | " Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)\n", 154 | "Collecting pdfminer.six (from unstructured[pdf])\n", 155 | " Downloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)\n", 156 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 157 | "\u001b[?25hCollecting pikepdf (from unstructured[pdf])\n", 158 | " Downloading pikepdf-8.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n", 159 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 160 | "\u001b[?25hCollecting pillow-heif (from unstructured[pdf])\n", 161 | " Downloading pillow_heif-0.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.5 MB)\n", 162 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m46.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 163 | "\u001b[?25hCollecting pypdf (from unstructured[pdf])\n", 164 | " Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)\n", 165 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 166 | "\u001b[?25hCollecting pytesseract (from unstructured[pdf])\n", 167 | " Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)\n", 168 | "Collecting google-cloud-vision (from unstructured[pdf])\n", 169 | " Downloading google_cloud_vision-3.7.2-py2.py3-none-any.whl (459 kB)\n", 170 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.6/459.6 kB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 171 | "\u001b[?25hCollecting effdet (from unstructured[pdf])\n", 172 | " Downloading effdet-0.4.1-py3-none-any.whl (112 kB)\n", 173 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.5/112.5 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 174 | "\u001b[?25hCollecting unstructured-inference==0.7.33 (from unstructured[pdf])\n", 175 | " Downloading unstructured_inference-0.7.33-py3-none-any.whl (56 kB)\n", 176 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.2/56.2 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 177 | "\u001b[?25hCollecting unstructured.pytesseract>=0.3.12 (from unstructured[pdf])\n", 178 | " Downloading unstructured.pytesseract-0.3.12-py3-none-any.whl (14 kB)\n", 179 | "Collecting layoutparser (from unstructured-inference==0.7.33->unstructured[pdf])\n", 180 | " Downloading layoutparser-0.3.4-py3-none-any.whl (19.2 MB)\n", 181 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m38.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 182 | "\u001b[?25hCollecting python-multipart (from unstructured-inference==0.7.33->unstructured[pdf])\n", 183 | " Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", 184 | "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (0.23.1)\n", 185 | "Requirement already satisfied: opencv-python!=4.7.0.68 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (4.8.0.76)\n", 186 | "Collecting onnxruntime>=1.17.0 (from unstructured-inference==0.7.33->unstructured[pdf])\n", 187 | " Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n", 188 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 189 | "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (3.7.1)\n", 190 | "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (2.3.0+cu121)\n", 191 | "Collecting timm (from unstructured-inference==0.7.33->unstructured[pdf])\n", 192 | " Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)\n", 193 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 194 | "\u001b[?25hRequirement already satisfied: transformers>=4.25.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[pdf]) (4.41.1)\n", 195 | "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", 196 | " Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", 197 | "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n", 198 | "Collecting fastapi (from gradio)\n", 199 | " Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n", 200 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 201 | "\u001b[?25hCollecting ffmpy (from gradio)\n", 202 | " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n", 203 | " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 204 | "Collecting gradio-client==0.16.4 (from gradio)\n", 205 | " Downloading gradio_client-0.16.4-py3-none-any.whl (315 kB)\n", 206 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m315.9/315.9 kB\u001b[0m \u001b[31m33.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 207 | "\u001b[?25hCollecting httpx>=0.24.1 (from gradio)\n", 208 | " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", 209 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 210 | "\u001b[?25hRequirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.4.0)\n", 211 | "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.4)\n", 212 | "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.5)\n", 213 | "Collecting orjson~=3.0 (from gradio)\n", 214 | " Downloading orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", 215 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 216 | "\u001b[?25hRequirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.3)\n", 217 | "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.4.0)\n", 218 | "Collecting pydub (from gradio)\n", 219 | " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", 220 | "Collecting ruff>=0.2.2 (from gradio)\n", 221 | " Downloading ruff-0.4.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.8 MB)\n", 222 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 223 | "\u001b[?25hCollecting semantic-version~=2.0 (from gradio)\n", 224 | " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", 225 | "Collecting tomlkit==0.12.0 (from gradio)\n", 226 | " Downloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n", 227 | "Collecting typer<1.0,>=0.12 (from gradio)\n", 228 | " Downloading typer-0.12.3-py3-none-any.whl (47 kB)\n", 229 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 230 | "\u001b[?25hRequirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.7)\n", 231 | "Collecting uvicorn>=0.14.0 (from gradio)\n", 232 | " Downloading uvicorn-0.30.0-py3-none-any.whl (62 kB)\n", 233 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 234 | "\u001b[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.16.4->gradio) (2023.6.0)\n", 235 | "Collecting websockets<12.0,>=10.0 (from gradio-client==0.16.4->gradio)\n", 236 | " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", 237 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 238 | "\u001b[?25hRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n", 239 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n", 240 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n", 241 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n", 242 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n", 243 | "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n", 244 | "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n", 245 | "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.19.2)\n", 246 | "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.1)\n", 247 | "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", 248 | " Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)\n", 249 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 250 | "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", 251 | " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", 252 | "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform) (1.63.0)\n", 253 | "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform) (1.64.0)\n", 254 | "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform) (1.48.2)\n", 255 | "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.0dev,>=2.14.1->google-cloud-aiplatform) (5.3.3)\n", 256 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.0dev,>=2.14.1->google-cloud-aiplatform) (0.4.0)\n", 257 | "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.0dev,>=2.14.1->google-cloud-aiplatform) (4.9)\n", 258 | "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery!=3.20.0,<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.3.3)\n", 259 | "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery!=3.20.0,<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.7.0)\n", 260 | "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery!=3.20.0,<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.8.2)\n", 261 | "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /usr/local/lib/python3.10/dist-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform) (0.13.0)\n", 262 | "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (3.7.1)\n", 263 | "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (2024.2.2)\n", 264 | "Collecting httpcore==1.* (from httpx>=0.24.1->gradio)\n", 265 | " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", 266 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 267 | "\u001b[?25hRequirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (3.7)\n", 268 | "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (1.3.1)\n", 269 | "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.24.1->gradio)\n", 270 | " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", 271 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 272 | "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->unstructured-inference==0.7.33->unstructured[pdf]) (3.14.0)\n", 273 | "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->unstructured-inference==0.7.33->unstructured[pdf]) (4.66.4)\n", 274 | "Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain<0.3.0,>=0.2.0->langchain-community)\n", 275 | " Downloading langchain_text_splitters-0.2.0-py3-none-any.whl (23 kB)\n", 276 | "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain-community)\n", 277 | " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", 278 | "Collecting packaging>=14.3 (from google-cloud-aiplatform)\n", 279 | " Downloading packaging-23.2-py3-none-any.whl (53 kB)\n", 280 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 281 | "\u001b[?25hRequirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[pdf]) (1.2.1)\n", 282 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[pdf]) (0.12.1)\n", 283 | "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[pdf]) (4.51.0)\n", 284 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[pdf]) (1.4.5)\n", 285 | "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[pdf]) (3.1.2)\n", 286 | "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2023.4)\n", 287 | "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n", 288 | "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=2->redisvl) (0.7.0)\n", 289 | "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=2->redisvl) (2.18.2)\n", 290 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.3.2)\n", 291 | "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n", 292 | "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n", 293 | "Collecting shellingham>=1.3.0 (from typer<1.0,>=0.12->gradio)\n", 294 | " Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n", 295 | "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n", 296 | "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->unstructured[pdf]) (2.5)\n", 297 | "Collecting humanfriendly>=9.1 (from coloredlogs->redisvl)\n", 298 | " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", 299 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 300 | "\u001b[?25hRequirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[pdf]) (0.18.0+cu121)\n", 301 | "Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[pdf]) (2.0.7)\n", 302 | "Collecting omegaconf>=2.0 (from effdet->unstructured[pdf])\n", 303 | " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", 304 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 305 | "\u001b[?25hCollecting starlette<0.38.0,>=0.37.2 (from fastapi->gradio)\n", 306 | " Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n", 307 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 308 | "\u001b[?25hCollecting fastapi-cli>=0.0.2 (from fastapi->gradio)\n", 309 | " Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n", 310 | "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi->gradio)\n", 311 | " Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", 312 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 313 | "\u001b[?25hCollecting email_validator>=2.0.0 (from fastapi->gradio)\n", 314 | " Downloading email_validator-2.1.1-py3-none-any.whl (30 kB)\n", 315 | "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect->unstructured[pdf]) (1.16.0)\n", 316 | "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[pdf]) (1.4.2)\n", 317 | "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[pdf]) (2024.5.15)\n", 318 | "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->unstructured[pdf]) (42.0.7)\n", 319 | "Collecting pillow<11.0,>=8.0 (from gradio)\n", 320 | " Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)\n", 321 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 322 | "\u001b[?25hCollecting Deprecated (from pikepdf->unstructured[pdf])\n", 323 | " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", 324 | "Collecting deepdiff>=6.0 (from unstructured-client->unstructured[pdf])\n", 325 | " Downloading deepdiff-7.0.1-py3-none-any.whl (80 kB)\n", 326 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.8/80.8 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 327 | "\u001b[?25hCollecting jsonpath-python>=1.0.6 (from unstructured-client->unstructured[pdf])\n", 328 | " Downloading jsonpath_python-1.0.6-py3-none-any.whl (7.6 kB)\n", 329 | "Collecting mypy-extensions>=1.0.0 (from unstructured-client->unstructured[pdf])\n", 330 | " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", 331 | "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[pdf]) (1.16.0)\n", 332 | "Collecting ordered-set<4.2.0,>=4.1.0 (from deepdiff>=6.0->unstructured-client->unstructured[pdf])\n", 333 | " Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n", 334 | "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->gradio)\n", 335 | " Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n", 336 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 337 | "\u001b[?25hRequirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.10/dist-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery!=3.20.0,<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (1.5.0)\n", 338 | "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.0->langchain-community)\n", 339 | " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", 340 | "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.12.1)\n", 341 | "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.35.1)\n", 342 | "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.18.1)\n", 343 | "Collecting antlr4-python3-runtime==4.9.* (from omegaconf>=2.0->effdet->unstructured[pdf])\n", 344 | " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", 345 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 346 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 347 | "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf]) (24.3.25)\n", 348 | "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf]) (1.12)\n", 349 | "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.0dev,>=2.14.1->google-cloud-aiplatform) (0.6.0)\n", 350 | "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", 351 | "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.16.1)\n", 352 | "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx>=0.24.1->gradio) (1.2.1)\n", 353 | "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from timm->unstructured-inference==0.7.33->unstructured[pdf]) (0.4.3)\n", 354 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[pdf]) (3.3)\n", 355 | "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 356 | " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", 357 | "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 358 | " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", 359 | "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 360 | " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", 361 | "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 362 | " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", 363 | "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 364 | " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", 365 | "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 366 | " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", 367 | "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 368 | " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", 369 | "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 370 | " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", 371 | "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 372 | " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", 373 | "Collecting nvidia-nccl-cu12==2.20.5 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 374 | " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", 375 | "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 376 | " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", 377 | "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[pdf]) (2.3.0)\n", 378 | "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->unstructured-inference==0.7.33->unstructured[pdf])\n", 379 | " Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n", 380 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m60.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 381 | "\u001b[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.33->unstructured[pdf]) (0.19.1)\n", 382 | "Collecting httptools>=0.5.0 (from uvicorn>=0.14.0->gradio)\n", 383 | " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n", 384 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 385 | "\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn>=0.14.0->gradio)\n", 386 | " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", 387 | "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn>=0.14.0->gradio)\n", 388 | " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", 389 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m67.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 390 | "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn>=0.14.0->gradio)\n", 391 | " Downloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", 392 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 393 | "\u001b[?25hRequirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from layoutparser->unstructured-inference==0.7.33->unstructured[pdf]) (1.11.4)\n", 394 | "Collecting iopath (from layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n", 395 | " Downloading iopath-0.1.10.tar.gz (42 kB)\n", 396 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 397 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 398 | "Collecting pdfplumber (from layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n", 399 | " Downloading pdfplumber-0.11.0-py3-none-any.whl (56 kB)\n", 400 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.4/56.4 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 401 | "\u001b[?25hRequirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[pdf]) (2.22)\n", 402 | "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", 403 | "Collecting portalocker (from iopath->layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n", 404 | " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n", 405 | "Collecting pypdfium2>=4.18.0 (from pdfplumber->layoutparser->unstructured-inference==0.7.33->unstructured[pdf])\n", 406 | " Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.8 MB)\n", 407 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m90.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 408 | "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[pdf]) (1.3.0)\n", 409 | "Building wheels for collected packages: ffmpy, langdetect, antlr4-python3-runtime, iopath\n", 410 | " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 411 | " Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5584 sha256=4a246b2d6c476603c97ede4ea813b21cc294654d5fd1e1e3f3edae0294c86ef4\n", 412 | " Stored in directory: /root/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n", 413 | " Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 414 | " Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993227 sha256=6506fc3e8eaea7efe256e8a35215d94a82259e3ff05368c5b4c942de00352cda\n", 415 | " Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106\n", 416 | " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 417 | " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=bbdaeecb21d60240939c6c994934ed32decf60e284169d4f6cda20729005279a\n", 418 | " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n", 419 | " Building wheel for iopath (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 420 | " Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31532 sha256=6acff56c46639790ab035a659f2b4442ed10883d95bfe85c44d1d53dfce3bd8d\n", 421 | " Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n", 422 | "Successfully built ffmpy langdetect antlr4-python3-runtime iopath\n", 423 | "Installing collected packages: pydub, filetype, ffmpy, antlr4-python3-runtime, websockets, uvloop, ujson, tomlkit, shellingham, semantic-version, ruff, redis, rapidfuzz, python-multipart, python-magic, python-iso639, python-dotenv, pypdfium2, pypdf, portalocker, pillow, packaging, orjson, ordered-set, onnx, omegaconf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, mypy-extensions, langdetect, jsonpointer, jsonpath-python, humanfriendly, httptools, h11, emoji, dnspython, Deprecated, backoff, aiofiles, watchfiles, uvicorn, unstructured.pytesseract, typing-inspect, starlette, pytesseract, pillow-heif, pikepdf, pdf2image, nvidia-cusparse-cu12, nvidia-cudnn-cu12, marshmallow, jsonpatch, iopath, httpcore, email_validator, deepdiff, coloredlogs, typer, redisvl, pdfminer.six, onnxruntime, nvidia-cusolver-cu12, langsmith, httpx, dataclasses-json, unstructured-client, pdfplumber, langchain-core, gradio-client, fastapi-cli, unstructured, layoutparser, langchain-text-splitters, google-cloud-vision, fastapi, timm, langchain, gradio, unstructured-inference, langchain-community, effdet\n", 424 | " Attempting uninstall: pillow\n", 425 | " Found existing installation: Pillow 9.4.0\n", 426 | " Uninstalling Pillow-9.4.0:\n", 427 | " Successfully uninstalled Pillow-9.4.0\n", 428 | " Attempting uninstall: packaging\n", 429 | " Found existing installation: packaging 24.0\n", 430 | " Uninstalling packaging-24.0:\n", 431 | " Successfully uninstalled packaging-24.0\n", 432 | " Attempting uninstall: typer\n", 433 | " Found existing installation: typer 0.9.4\n", 434 | " Uninstalling typer-0.9.4:\n", 435 | " Successfully uninstalled typer-0.9.4\n", 436 | "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", 437 | "imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.3.0 which is incompatible.\n", 438 | "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n", 439 | "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n", 440 | "\u001b[0mSuccessfully installed Deprecated-1.2.14 aiofiles-23.2.1 antlr4-python3-runtime-4.9.3 backoff-2.2.1 coloredlogs-15.0.1 dataclasses-json-0.6.6 deepdiff-7.0.1 dnspython-2.6.1 effdet-0.4.1 email_validator-2.1.1 emoji-2.12.1 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 filetype-1.2.0 google-cloud-vision-3.7.2 gradio-4.31.5 gradio-client-0.16.4 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 humanfriendly-10.0 iopath-0.1.10 jsonpatch-1.33 jsonpath-python-1.0.6 jsonpointer-2.4 langchain-0.2.1 langchain-community-0.2.1 langchain-core-0.2.1 langchain-text-splitters-0.2.0 langdetect-1.0.9 langsmith-0.1.63 layoutparser-0.3.4 marshmallow-3.21.2 mypy-extensions-1.0.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 omegaconf-2.3.0 onnx-1.16.1 onnxruntime-1.18.0 ordered-set-4.1.0 orjson-3.10.3 packaging-23.2 pdf2image-1.17.0 pdfminer.six-20231228 pdfplumber-0.11.0 pikepdf-8.15.1 pillow-10.3.0 pillow-heif-0.16.0 portalocker-2.8.2 pydub-0.25.1 pypdf-4.2.0 pypdfium2-4.30.0 pytesseract-0.3.10 python-dotenv-1.0.1 python-iso639-2024.4.27 python-magic-0.4.27 python-multipart-0.0.9 rapidfuzz-3.9.2 redis-5.0.4 redisvl-0.2.0 ruff-0.4.6 semantic-version-2.10.0 shellingham-1.5.4 starlette-0.37.2 timm-1.0.3 tomlkit-0.12.0 typer-0.12.3 typing-inspect-0.9.0 ujson-5.10.0 unstructured-0.14.3 unstructured-client-0.22.0 unstructured-inference-0.7.33 unstructured.pytesseract-0.3.12 uvicorn-0.30.0 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3\n" 441 | ] 442 | }, 443 | { 444 | "data": { 445 | "application/vnd.colab-display-data+json": { 446 | "id": "3b79e51e64a24b3ea311607a182d9806", 447 | "pip_warning": { 448 | "packages": [ 449 | "PIL", 450 | "google", 451 | "pydevd_plugins" 452 | ] 453 | } 454 | } 455 | }, 456 | "metadata": {}, 457 | "output_type": "display_data" 458 | }, 459 | { 460 | "data": { 461 | "text/plain": [ 462 | "{'status': 'ok', 'restart': True}" 463 | ] 464 | }, 465 | "execution_count": 1, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "!pip install -U \"redisvl>=0.3.7\" google-cloud-aiplatform langchain-community unstructured[pdf] gradio\n", 472 | "\n", 473 | "import IPython\n", 474 | "\n", 475 | "app = IPython.Application.instance()\n", 476 | "app.kernel.do_shutdown(True)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": { 482 | "id": "jr_IviwqFS7K" 483 | }, 484 | "source": [ 485 | "### Using Free Redis Cloud account on GCP\n", 486 | "You can also use Forever Free instance of Redis Cloud. To activate it:\n", 487 | "- Head to https://redis.com/try-free/\n", 488 | "- Register (using gmail-based registration is the easiest)\n", 489 | "- Create New Subscription\n", 490 | "- Use the following options:\n", 491 | " - Fixed plan, Google Cloud\n", 492 | " - New 30Mb Free database\n", 493 | "- Create new RedisStack DB\n", 494 | "\n", 495 | "If you are registering at Redis Cloud for the first time - the last few steps would be performed for you by default. Capture the host, port and default password of the new database. You can use these instead of default `localhost` based in the following code block." 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": { 501 | "id": "p5kx9ePDwwp6" 502 | }, 503 | "source": [ 504 | "^^^ If prompted press the Restart button to restart the kernel. ^^^\n", 505 | "\n", 506 | "### Install Redis locally (optional)\n", 507 | "If you have a Redis db running elsewhere with [Redis Stack](https://redis.io/docs/about/about-stack/) installed, you don't need to run it on this machine. You can skip to the \"Connect to Redis server\" step." 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 1, 513 | "metadata": { 514 | "colab": { 515 | "base_uri": "https://localhost:8080/" 516 | }, 517 | "id": "vs4KZURX4XpT", 518 | "outputId": "793ec489-129a-4596-b5bf-f093f40ef56c" 519 | }, 520 | "outputs": [ 521 | { 522 | "name": "stdout", 523 | "output_type": "stream", 524 | "text": [ 525 | "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb jammy main\n", 526 | "Starting redis-stack-server, database path /var/lib/redis-stack\n" 527 | ] 528 | }, 529 | { 530 | "name": "stderr", 531 | "output_type": "stream", 532 | "text": [ 533 | "gpg: cannot open '/dev/tty': No such device or address\n", 534 | "curl: (23) Failed writing body\n" 535 | ] 536 | } 537 | ], 538 | "source": [ 539 | "%%sh\n", 540 | "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", 541 | "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", 542 | "sudo apt-get update > /dev/null 2>&1\n", 543 | "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", 544 | "redis-stack-server --daemonize yes" 545 | ] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": { 550 | "id": "zvDp8WNz4XpU" 551 | }, 552 | "source": [ 553 | "### Connect to Redis server\n", 554 | "Replace the connection params below with your own if you are connecting to an external Redis instance." 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 2, 560 | "metadata": { 561 | "colab": { 562 | "base_uri": "https://localhost:8080/" 563 | }, 564 | "id": "duCyNgfZ4XpU", 565 | "outputId": "12d97af1-9d7d-471f-8c10-4aee284a8fe5" 566 | }, 567 | "outputs": [ 568 | { 569 | "data": { 570 | "text/plain": [ 571 | "True" 572 | ] 573 | }, 574 | "execution_count": 2, 575 | "metadata": {}, 576 | "output_type": "execute_result" 577 | } 578 | ], 579 | "source": [ 580 | "import os\n", 581 | "import redis\n", 582 | "\n", 583 | "# Redis connection params\n", 584 | "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") #\"redis-12110.c82.us-east-1-2.ec2.cloud.redislabs.com\"\n", 585 | "REDIS_PORT = os.getenv(\"REDIS_PORT\", \"6379\") #12110\n", 586 | "REDIS_PASSWORD = os.getenv(\"REDIS_PASSWORD\", \"\") #\"pobhBJP7Psicp2gV0iqa2ZOc1WdXXXXX\"\n", 587 | "\n", 588 | "# Create Redis client\n", 589 | "redis_client = redis.Redis(\n", 590 | " host=REDIS_HOST,\n", 591 | " port=REDIS_PORT,\n", 592 | " password=REDIS_PASSWORD\n", 593 | ")\n", 594 | "\n", 595 | "# Test connection\n", 596 | "redis_client.ping()" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 3, 602 | "metadata": { 603 | "colab": { 604 | "base_uri": "https://localhost:8080/" 605 | }, 606 | "id": "0Rrz76w96dF3", 607 | "outputId": "a7fd6b35-736b-4ff6-b2be-c2ff963adc96" 608 | }, 609 | "outputs": [ 610 | { 611 | "data": { 612 | "text/plain": [ 613 | "True" 614 | ] 615 | }, 616 | "execution_count": 3, 617 | "metadata": {}, 618 | "output_type": "execute_result" 619 | } 620 | ], 621 | "source": [ 622 | "# Clear Redis database (optional)\n", 623 | "redis_client.flushdb()" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": { 629 | "id": "BpjxW-kou-FY" 630 | }, 631 | "source": [ 632 | "### Authenticate to Google Cloud" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": 4, 638 | "metadata": { 639 | "colab": { 640 | "base_uri": "https://localhost:8080/" 641 | }, 642 | "id": "SeTJb51SKs_W", 643 | "outputId": "68d9a5dc-5e6f-4959-b2ff-7533c580d473" 644 | }, 645 | "outputs": [ 646 | { 647 | "name": "stdout", 648 | "output_type": "stream", 649 | "text": [ 650 | "Authenticated\n" 651 | ] 652 | } 653 | ], 654 | "source": [ 655 | "from google.colab import auth\n", 656 | "auth.authenticate_user()\n", 657 | "print('Authenticated')" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 5, 663 | "metadata": { 664 | "colab": { 665 | "base_uri": "https://localhost:8080/" 666 | }, 667 | "id": "8Yil6twAvIuH", 668 | "outputId": "c0a1240c-b892-4bd0-cf71-6e43952bc422" 669 | }, 670 | "outputs": [ 671 | { 672 | "name": "stdout", 673 | "output_type": "stream", 674 | "text": [ 675 | "PROJECT_ID:··········\n", 676 | "REGION:us-central1\n" 677 | ] 678 | } 679 | ], 680 | "source": [ 681 | "from getpass import getpass\n", 682 | "\n", 683 | "# input your GCP project ID and region for Vertex AI\n", 684 | "PROJECT_ID = getpass(\"PROJECT_ID:\") #'central-beach-194106'\n", 685 | "REGION = input(\"REGION:\") #'us-central1'" 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": { 691 | "id": "9vDGqjHmVgjB" 692 | }, 693 | "source": [ 694 | "## 2. Obtain dataset\n", 695 | "\n", 696 | "Below pull the dataset from ..." 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 6, 702 | "metadata": { 703 | "colab": { 704 | "base_uri": "https://localhost:8080/" 705 | }, 706 | "id": "Pxshas3XpgdV", 707 | "outputId": "d4085964-d3f5-430d-c48d-0959ccd1d8dc" 708 | }, 709 | "outputs": [ 710 | { 711 | "name": "stdout", 712 | "output_type": "stream", 713 | "text": [ 714 | "--2024-05-29 17:44:32-- https://www.irs.gov/pub/irs-pdf/p5718.pdf\n", 715 | "Resolving www.irs.gov (www.irs.gov)... 23.201.171.228, 2600:1408:5400:4b2::f50, 2600:1408:5400:48d::f50\n", 716 | "Connecting to www.irs.gov (www.irs.gov)|23.201.171.228|:443... connected.\n", 717 | "HTTP request sent, awaiting response... 200 OK\n", 718 | "Length: 8749823 (8.3M) [application/pdf]\n", 719 | "Saving to: ‘resources/p5718.pdf’\n", 720 | "\n", 721 | "p5718.pdf 100%[===================>] 8.34M --.-KB/s in 0.1s \n", 722 | "\n", 723 | "2024-05-29 17:44:32 (60.5 MB/s) - ‘resources/p5718.pdf’ saved [8749823/8749823]\n", 724 | "\n" 725 | ] 726 | } 727 | ], 728 | "source": [ 729 | "#Procure a dataset - downloading a publication from IRS\n", 730 | "!mkdir resources\n", 731 | "!wget https://www.irs.gov/pub/irs-pdf/p5718.pdf -P resources/" 732 | ] 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": { 737 | "id": "0Kh0ObD4xZtK" 738 | }, 739 | "source": [ 740 | "### Create text embeddings with Vertex AI embedding model\n", 741 | "Use the [Vertex AI API for text embeddings](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings), developed by Google.\n", 742 | "\n", 743 | "> Text embeddings are a dense vector representation of a piece of content such that, if two pieces of content are semantically similar, their respective embeddings are located near each other in the embedding vector space. This representation can be used to solve common NLP tasks, such as:\n", 744 | "> - **Semantic search**: Search text ranked by semantic similarity.\n", 745 | "> - **Recommendation**: Return items with text attributes similar to the given text.\n", 746 | "> - **Classification**: Return the class of items whose text attributes are similar to the given text.\n", 747 | "> - **Clustering**: Cluster items whose text attributes are similar to the given text.\n", 748 | "> - **Outlier Detection**: Return items where text attributes are least related to the given text.\n", 749 | "\n", 750 | "The Vertex AI text-embeddings API lets you create a text embedding using Generative AI on Vertex AI. The `textembedding-gecko` model accepts a maximum of 3,072 input tokens (i.e. words) and outputs 768-dimensional vector embeddings." 751 | ] 752 | }, 753 | { 754 | "cell_type": "markdown", 755 | "metadata": { 756 | "id": "x3o_9ehYuEpA" 757 | }, 758 | "source": [ 759 | "### Set up embeddings\n", 760 | "We define a helper function to create embeddings from a list of texts convert them to a byte string for efficient storage in Redis.\n", 761 | "\n" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 7, 767 | "metadata": { 768 | "id": "zrpKY4W5yb0M" 769 | }, 770 | "outputs": [], 771 | "source": [ 772 | "from redisvl.utils.vectorize import VertexAITextVectorizer\n", 773 | "\n", 774 | "vectorizer = VertexAITextVectorizer(\n", 775 | " model = \"text-embedding-004\",\n", 776 | " api_config = {\"project_id\": PROJECT_ID, \"location\": REGION}\n", 777 | ")" 778 | ] 779 | }, 780 | { 781 | "cell_type": "markdown", 782 | "metadata": { 783 | "id": "peFHIY351eDc" 784 | }, 785 | "source": [ 786 | "## 3. Generate Embeddings\n", 787 | "The next step is to create chunks of the pdf and then embed each chunk as a vector." 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 8, 793 | "metadata": { 794 | "colab": { 795 | "base_uri": "https://localhost:8080/" 796 | }, 797 | "id": "tzkRiknVXOtc", 798 | "outputId": "b597c6b5-45d7-4b41-f46a-cdea0659efd0" 799 | }, 800 | "outputs": [ 801 | { 802 | "name": "stderr", 803 | "output_type": "stream", 804 | "text": [ 805 | "[nltk_data] Downloading package punkt to /root/nltk_data...\n", 806 | "[nltk_data] Unzipping tokenizers/punkt.zip.\n", 807 | "[nltk_data] Downloading package averaged_perceptron_tagger to\n", 808 | "[nltk_data] /root/nltk_data...\n", 809 | "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n" 810 | ] 811 | }, 812 | { 813 | "name": "stdout", 814 | "output_type": "stream", 815 | "text": [ 816 | "Done preprocessing. Created 44 chunks of the original pdf resources/p5718.pdf\n" 817 | ] 818 | } 819 | ], 820 | "source": [ 821 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", 822 | "from langchain.document_loaders import UnstructuredFileLoader\n", 823 | "\n", 824 | "doc = \"resources/p5718.pdf\"\n", 825 | "\n", 826 | "# set up the file loader/extractor and text splitter to create chunks\n", 827 | "text_splitter = RecursiveCharacterTextSplitter(\n", 828 | " chunk_size=2500, chunk_overlap=0\n", 829 | ")\n", 830 | "loader = UnstructuredFileLoader(\n", 831 | " doc, mode=\"single\", strategy=\"fast\"\n", 832 | ")\n", 833 | "\n", 834 | "# extract, load, and make chunks\n", 835 | "chunks = loader.load_and_split(text_splitter)\n", 836 | "\n", 837 | "print(\"Done preprocessing. Created\", len(chunks), \"chunks of the original pdf\", doc)" 838 | ] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": 9, 843 | "metadata": { 844 | "colab": { 845 | "base_uri": "https://localhost:8080/" 846 | }, 847 | "id": "OETsrYvfuzmX", 848 | "outputId": "9eb60a85-6e59-4a34-e028-34d7b211cfdd" 849 | }, 850 | "outputs": [ 851 | { 852 | "data": { 853 | "text/plain": [ 854 | "True" 855 | ] 856 | }, 857 | "execution_count": 9, 858 | "metadata": {}, 859 | "output_type": "execute_result" 860 | } 861 | ], 862 | "source": [ 863 | "# Embed each chunk content\n", 864 | "embeddings = vectorizer.embed_many([chunk.page_content for chunk in chunks], as_buffer=True, dtype=\"float32\")\n", 865 | "\n", 866 | "# Check to make sure we've created enough embeddings, 1 per document chunk\n", 867 | "len(embeddings) == len(chunks)" 868 | ] 869 | }, 870 | { 871 | "cell_type": "markdown", 872 | "metadata": { 873 | "id": "WGVt7-DNr80e" 874 | }, 875 | "source": [ 876 | "## 4. Create Index\n", 877 | "\n", 878 | "Now that we have created embeddings that represent the text in our dataset, we will create an index that enables efficient search over the embeddings.\n", 879 | "\n", 880 | "**Why do we need to enable search???**\n", 881 | "Using Redis for vector search allows us to retrieve chunks of text data that are **similar** or **relevant** to an input question or query. This will be extremely helpful for our sample generative ai / LLM application." 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": 10, 887 | "metadata": { 888 | "id": "9mNa5LNn4XpX" 889 | }, 890 | "outputs": [], 891 | "source": [ 892 | "from redisvl.schema import IndexSchema\n", 893 | "from redisvl.index import SearchIndex\n", 894 | "\n", 895 | "\n", 896 | "index_name = \"redisvl\"\n", 897 | "\n", 898 | "schema = IndexSchema.from_dict({\n", 899 | " \"index\": {\n", 900 | " \"name\": index_name,\n", 901 | " \"prefix\": \"chunk\"\n", 902 | " },\n", 903 | " \"fields\": [\n", 904 | " {\n", 905 | " \"name\": \"chunk_id\",\n", 906 | " \"type\": \"tag\",\n", 907 | " \"attrs\": {\n", 908 | " \"sortable\": True\n", 909 | " }\n", 910 | " },\n", 911 | " {\n", 912 | " \"name\": \"content\",\n", 913 | " \"type\": \"text\"\n", 914 | " },\n", 915 | " {\n", 916 | " \"name\": \"text_embedding\",\n", 917 | " \"type\": \"vector\",\n", 918 | " \"attrs\": {\n", 919 | " \"dims\": vectorizer.dims,\n", 920 | " \"distance_metric\": \"cosine\",\n", 921 | " \"algorithm\": \"flat\",\n", 922 | " \"datatype\": \"float32\"\n", 923 | " }\n", 924 | " }\n", 925 | " ]\n", 926 | "})" 927 | ] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": 11, 932 | "metadata": { 933 | "id": "cNjHD3D94_Sc" 934 | }, 935 | "outputs": [], 936 | "source": [ 937 | "# Create an index from schema and the client\n", 938 | "index = SearchIndex(schema, redis_client)\n", 939 | "index.create(overwrite=True, drop=True)" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": 12, 945 | "metadata": { 946 | "id": "VOzL5qB-uzrE" 947 | }, 948 | "outputs": [], 949 | "source": [ 950 | "# Load expects an iterable of dictionaries\n", 951 | "data = [\n", 952 | " {\n", 953 | " 'chunk_id': f'{i}',\n", 954 | " 'content': chunk.page_content,\n", 955 | " 'text_embedding': embeddings[i]\n", 956 | " } for i, chunk in enumerate(chunks)\n", 957 | "]\n", 958 | "\n", 959 | "# RedisVL handles batching automatically\n", 960 | "keys = index.load(data, id_field=\"chunk_id\")" 961 | ] 962 | }, 963 | { 964 | "cell_type": "markdown", 965 | "metadata": { 966 | "id": "G6CmHY3-6wB1" 967 | }, 968 | "source": [ 969 | "## 5. Query\n", 970 | "Now we can use RedisVL to perform a variety of vector search operations." 971 | ] 972 | }, 973 | { 974 | "cell_type": "code", 975 | "execution_count": 13, 976 | "metadata": { 977 | "colab": { 978 | "base_uri": "https://localhost:8080/", 979 | "height": 35 980 | }, 981 | "id": "d9HKH8kO5T3E", 982 | "outputId": "17299b9c-26d5-4352-aedb-b73474ed6bb3" 983 | }, 984 | "outputs": [ 985 | { 986 | "data": { 987 | "application/vnd.google.colaboratory.intrinsic+json": { 988 | "type": "string" 989 | }, 990 | "text/plain": [ 991 | "'*=>[KNN 3 @text_embedding $vector AS vector_distance] RETURN 3 chunk_id content vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3'" 992 | ] 993 | }, 994 | "execution_count": 13, 995 | "metadata": {}, 996 | "output_type": "execute_result" 997 | } 998 | ], 999 | "source": [ 1000 | "from redisvl.query import VectorQuery\n", 1001 | "\n", 1002 | "query = \"What is TCC?\"\n", 1003 | "\n", 1004 | "query_embedding = vectorizer.embed(query)\n", 1005 | "\n", 1006 | "vector_query = VectorQuery(\n", 1007 | " vector=query_embedding,\n", 1008 | " vector_field_name=\"text_embedding\",\n", 1009 | " num_results=3,\n", 1010 | " return_fields=[\"chunk_id\", \"content\"],\n", 1011 | " return_score=True\n", 1012 | ")\n", 1013 | "\n", 1014 | "# show the raw redis query\n", 1015 | "str(vector_query)" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": 14, 1021 | "metadata": { 1022 | "colab": { 1023 | "base_uri": "https://localhost:8080/" 1024 | }, 1025 | "id": "W_-AlXmE5dUK", 1026 | "outputId": "ad823e6e-fe74-47e0-e2dc-f689ca4375f2" 1027 | }, 1028 | "outputs": [ 1029 | { 1030 | "data": { 1031 | "text/plain": [ 1032 | "[{'id': 'chunk:7',\n", 1033 | " 'vector_distance': '0.500085473061',\n", 1034 | " 'chunk_id': '7',\n", 1035 | " 'content': 'IRIS uses QuickAlerts, an IRS e-mail service, to disseminate information quickly regarding IRIS issues to subscribers. This service keeps tax professionals up to date on IRIS issues throughout the year, with emphasis on issues during the filing season. After subscribing, customers will receive “round the clock” communication issues such as electronic specifica- tions and system information needed for Software Developers and Transmitters to transmit to IRS. New subscribers may sign up through the “subscription page” link located on the QuickAlerts “more” e-file Benefits for Tax Professionals page.\\n\\n9\\n\\nPublication 5718\\n\\n1.3 Registration and Application Process\\n\\nExternal users must register with the current IRS credential service provider and complete the IRIS Application for Transmitter Control Code (TCC) to submit transmissions using the IRIS intake platform. Information returns filed through the IRIS A2A system cannot be filed using any other intake platform TCC. These include:\\n\\ne-File Application (MeF)\\n\\nAffordable Care Act (ACA) Application for TCC (AIR)\\n\\nPartnership Bipartisan Budget Act (PBBA) Application for TCC\\n\\n\\n\\nInformation Returns (IR) Application for TCC (FIRE)\\n\\n\\n\\nIRIS TCC for the Taxpayer Portal\\n\\n1.3.1 Registration\\n\\nBefore completing the IRIS Application for TCC, each user must create an account or sign-in using their existing credentials to validate their identities using the latest authentication process.\\n\\nFor more information, please visit How to register for IRS online self-help tools | Internal Revenue Service.\\n\\n1.3.2 Who should apply for an IRIS TCC\\n\\nIf you are transmitting information returns to the IRS or if you are developing software to file information returns electronically, you must apply for one or more TCCs using the IRIS Appli- cation for TCC available online. A single application can be used to apply for multiple roles and the necessary TCCs. The IRS encourages transmitters who file for multiple issuers to submit one application and use the assigned TCC for all issuers. The purpose of the TCC is to identify the business acting as the transmitter of the file. As a transmitter, you may transmit files for as many companies as you need to under one TCC. The IRIS Application for TCC contains three separate roles: Software Developer, Transmitter, and Issuer. Complete the IRIS Application for TCC if your firm or organization is performing one or more of the following roles:'},\n", 1036 | " {'id': 'chunk:1',\n", 1037 | " 'vector_distance': '0.502500534058',\n", 1038 | " 'chunk_id': '1',\n", 1039 | " 'content': 'Processing Year 2024 Revisions After 12-2023\\n\\nLocation\\n\\nUpdate\\n\\nSection 6.1.1\\n\\n2-Step Correction Step 2\\n\\nPublication 5718\\n\\nTable of Contents 1. Introduction ���������������������������������������������������������������������������������������������������������� 7\\n\\n1.1 Purpose ���������������������������������������������������������������������������������������������������������������������� 8\\n\\n1.2 Communications �������������������������������������������������������������������������������������������������������� 9\\n\\n1.2.1 IRIS Web Site ���������������������������������������������������������������������������������������������������� 9\\n\\n1.3 Registration and Application Process �������������������������������������������������������������������� 10\\n\\n1.3.1 Registration ��������������������������������������������������������������������������������������������������� 10\\n\\n1.3.2 Who should apply for an IRIS TCC ��������������������������������������������������������������� 10\\n\\n1.3.3 Third-Party Transmitters ������������������������������������������������������������������������������� 12\\n\\n1.3.4 Things you need to know before completing the IRIS ��������������������������������� 12\\n\\n1.3.5 Access the IRIS Application for TCC ������������������������������������������������������������� 13\\n\\n1.3.6 Application Approved/Completed ����������������������������������������������������������������� 13\\n\\n1.3.7 Revise Current TCC Information �������������������������������������������������������������������� 14\\n\\n1.3.8 Deleted TCCs ������������������������������������������������������������������������������������������������� 14\\n\\n1.4 Transmitter and Issuer TCCs ����������������������������������������������������������������������������������� 14\\n\\n1.5 Software Developer TCCs ��������������������������������������������������������������������������������������� 14\\n\\n1.6 API Client ID ������������������������������������������������������������������������������������������������������������ 15\\n\\n2. Transmissions and Submissions ���������������������������������������������������������������������� 18\\n\\n2.1 Transmission/Submission Definitions and Limitations ����������������������������������������� 18\\n\\n2.2 Uniquely Identifying the Transmission �������������������������������������������������������������������� 19\\n\\n3. Transmitting Information Returns �������������������������������������������������������������������� 20'},\n", 1040 | " {'id': 'chunk:11',\n", 1041 | " 'vector_distance': '0.505242109299',\n", 1042 | " 'chunk_id': '11',\n", 1043 | " 'content': 'When your IRIS Application for TCC is approved and completed, a five-character alphanu- meric TCC that begins with the letter ‘D’ will be assigned to your business. An approval letter will be sent via United States Postal Service (USPS) to the address listed on the application, informing you of your TCC. You can also sign into your IRIS Application for TCC to view your TCCs on the Application Summary page.\\n\\n13\\n\\nPublication 5718\\n\\nIf your application is in Completed status for more than 45 days and your TCC has not been assigned, contact the Help Desk.\\n\\n1.3.7 Revise Current TCC Information\\n\\nAs changes occur, you must update and maintain your IRIS TCC Application. Some changes will require all ROs or Authorized Delegates (ADs) on the application to re-sign the Appli- cation Submission page. Below are examples of when an application would need to be re-signed (this list is not all inclusive):\\n\\nFirm’s DBA Name change\\n\\nRole changes or additions\\n\\nAdd, delete or change RO and/or AD\\n\\nNote: Changes submitted on an IRIS TCC Application do not change the address of IRS tax records just as a change of address to IRS tax records does not automatically update infor- mation on an IRIS TCC Application.\\n\\nChanges that require a firm to acquire a new Employer Identification Number (EIN) require a new IRIS TCC Application. Firms that change their form of organization, such as from a sole proprietorship to a corporation, generally require the firm to acquire a new EIN.\\n\\n1.3.8 Deleted TCCs\\n\\nYour TCCs will remain valid if you transmit information returns or extensions of time to file. If you don’t use your TCC for three consecutive years, your TCC will be deleted. Once your TCC is deleted it cannot be reactivated. You’ll need to submit a new IRIS Application for TCC.\\n\\n1.4 Transmitter and Issuer TCCs\\n\\nDepending on the roles selected on the application, one or more TCCs will be assigned. Each TCC will have an indicator of Test “T” or Production “P” and status of Active, Inactive, or Dropped. Transmitters and Issuers are issued a TCC in Test “T” status until required Communication Testing is conducted in the ATS environment and passed. Once Commu- nication Testing is passed, the Transmitter should contact the Help Desk to request to be moved to Production “P” status. For more information about Communication Testing for Transmitters, refer to Publication 5719, Information Returns Intake System (IRIS) Test Package for Information Returns.\\n\\n1.5 Software Developer TCCs'}]" 1044 | ] 1045 | }, 1046 | "execution_count": 14, 1047 | "metadata": {}, 1048 | "output_type": "execute_result" 1049 | } 1050 | ], 1051 | "source": [ 1052 | "# execute the query with RedisVL\n", 1053 | "index.query(vector_query)" 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "code", 1058 | "execution_count": 15, 1059 | "metadata": { 1060 | "colab": { 1061 | "base_uri": "https://localhost:8080/" 1062 | }, 1063 | "id": "-vT4bTYB5h74", 1064 | "outputId": "81e773a8-e9ec-4fce-afa6-6e2a1308213b" 1065 | }, 1066 | "outputs": [ 1067 | { 1068 | "name": "stdout", 1069 | "output_type": "stream", 1070 | "text": [ 1071 | "7 0.500085473061\n", 1072 | "1 0.502500534058\n", 1073 | "11 0.505242109299\n" 1074 | ] 1075 | } 1076 | ], 1077 | "source": [ 1078 | "# paginate through results\n", 1079 | "for result in index.paginate(vector_query, page_size=1):\n", 1080 | " print(result[0][\"chunk_id\"], result[0][\"vector_distance\"], flush=True)" 1081 | ] 1082 | }, 1083 | { 1084 | "cell_type": "code", 1085 | "execution_count": 16, 1086 | "metadata": { 1087 | "colab": { 1088 | "base_uri": "https://localhost:8080/", 1089 | "height": 35 1090 | }, 1091 | "id": "t9MjgGiCLLqv", 1092 | "outputId": "0f2e8768-6bb9-4943-c46c-ae44b0b6c023" 1093 | }, 1094 | "outputs": [ 1095 | { 1096 | "data": { 1097 | "application/vnd.google.colaboratory.intrinsic+json": { 1098 | "type": "string" 1099 | }, 1100 | "text/plain": [ 1101 | "'@content:(Social Security)=>[KNN 3 @text_embedding $vector AS vector_distance] RETURN 3 chunk_id content vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3'" 1102 | ] 1103 | }, 1104 | "execution_count": 16, 1105 | "metadata": {}, 1106 | "output_type": "execute_result" 1107 | } 1108 | ], 1109 | "source": [ 1110 | "from redisvl.query.filter import Text\n", 1111 | "\n", 1112 | "query = \"What is TCC?\"\n", 1113 | "\n", 1114 | "query_embedding = vectorizer.embed(query)\n", 1115 | "\n", 1116 | "text_filter = Text(\"content\") % \"Social Security\"\n", 1117 | "\n", 1118 | "vector_query = VectorQuery(\n", 1119 | " vector=query_embedding,\n", 1120 | " vector_field_name=\"text_embedding\",\n", 1121 | " num_results=3,\n", 1122 | " return_fields=[\"chunk_id\", \"content\"],\n", 1123 | " return_score=True,\n", 1124 | " filter_expression=text_filter\n", 1125 | ")\n", 1126 | "\n", 1127 | "# show the raw redis query\n", 1128 | "str(vector_query)" 1129 | ] 1130 | }, 1131 | { 1132 | "cell_type": "code", 1133 | "execution_count": 17, 1134 | "metadata": { 1135 | "colab": { 1136 | "base_uri": "https://localhost:8080/" 1137 | }, 1138 | "id": "ZXci2AeGUMtE", 1139 | "outputId": "8cdf34ee-d4a0-46eb-f11a-096a928b9de1" 1140 | }, 1141 | "outputs": [ 1142 | { 1143 | "data": { 1144 | "text/plain": [ 1145 | "[{'id': 'chunk:9',\n", 1146 | " 'vector_distance': '0.572170257568',\n", 1147 | " 'chunk_id': '9',\n", 1148 | " 'content': 'Select the role of Transmitter on your application. Note: The TCC for a Transmitter can be used to transmit your own returns and others. You may not use an Issuer TCC to transmit information returns for others.\\n\\n11\\n\\nPublication 5718\\n\\n1.3.3 Third-Party Transmitters\\n\\nIf you do not have an in-house programmer familiar with XML or do not wish to purchase A2A software that is certified to support the information returns that you plan to file, you can file through a Third-Party Transmitter or use the online Taxpayer Portal. Visit www.irs.gov/ iris for additional information.\\n\\nOnly those persons listed as an Authorized User on the IRIS Application for TCC qualify to receive information about a Receipt ID associated with a TCC listed on that application.\\n\\nIf your Third-Party Transmitter needs technical assistance regarding a Receipt ID associated with records that were submitted on behalf of your organization, they should contact the Help Desk.\\n\\nWhen filing through a Third-Party Transmitter obtain the following for each submission filed on your behalf:\\n\\nA copy of all electronic records within each submission, along with the Receipt ID for the transmission in which they were filed.\\n\\nThe transmission Acknowledgement that includes the Status that is returned when processing is complete (Accepted, Accepted With Errors, Partially Accepted, Rejected) and a detailed list of errors, if any.\\n\\nNote: The items cited above are critical to your ability to make corrections should your Third- party Transmitter go out of business or be otherwise unavailable to file corrections on your behalf.\\n\\n1.3.4 Things you need to know before completing the IRIS\\n\\nA responsible official (RO) initiates and submits the IRIS Application for TCC electronically. Each RO must sign the terms of agreement using their five-digit PIN they created when they initially accessed the system. An application will receive a tracking number after saving it. Completing the application in a single session isn’t a requirement.\\n\\nThe following information is necessary to complete each application:\\n\\nFirm’s business structure\\n\\nFirm’s (EIN) (the system doesn’t allow firms to use a Social Security Number (SSN) or Individual Taxpayer Identification Number (ITIN)\\n\\nFirm’s legal business name and business type\\n\\nFirm’s doing business as name when it’s different from the legal business name\\n\\nBusiness phone (phone country code and phone number)\\n\\nBusiness address (this must be a physical location, not a post office box)'}]" 1149 | ] 1150 | }, 1151 | "execution_count": 17, 1152 | "metadata": {}, 1153 | "output_type": "execute_result" 1154 | } 1155 | ], 1156 | "source": [ 1157 | "# execute the query with RedisVL\n", 1158 | "index.query(vector_query)" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "markdown", 1163 | "metadata": { 1164 | "id": "82-AbKHxItif" 1165 | }, 1166 | "source": [ 1167 | "# Building a RAG Pipeline from Scratch\n", 1168 | "We're going to build a complete RAG pipeline from scratch incorporating the following components:\n", 1169 | "\n", 1170 | "- Standard retrieval and chat completion\n", 1171 | "- Dense content representation to improve accuracy\n", 1172 | "- Query re-writing to improve accuracy\n", 1173 | "- Semantic caching to improve performance\n", 1174 | "- Conversational session history to improve personalization" 1175 | ] 1176 | }, 1177 | { 1178 | "cell_type": "code", 1179 | "execution_count": 18, 1180 | "metadata": { 1181 | "id": "a6BsbxUG7kVc" 1182 | }, 1183 | "outputs": [], 1184 | "source": [ 1185 | "#@title Setup RedisVL *AsyncSearchIndex*\n", 1186 | "\n", 1187 | "from redis.asyncio import Redis\n", 1188 | "from redisvl.index import AsyncSearchIndex\n", 1189 | "\n", 1190 | "# Create Redis client\n", 1191 | "redis_client = Redis(\n", 1192 | " host=REDIS_HOST,\n", 1193 | " port=REDIS_PORT,\n", 1194 | " password=REDIS_PASSWORD\n", 1195 | ")\n", 1196 | "\n", 1197 | "index = AsyncSearchIndex(index.schema, redis_client)" 1198 | ] 1199 | }, 1200 | { 1201 | "cell_type": "code", 1202 | "execution_count": 19, 1203 | "metadata": { 1204 | "id": "sSbXjA896Ami" 1205 | }, 1206 | "outputs": [], 1207 | "source": [ 1208 | "#@title Setup VertexAI Generative Model with Safety Settings\n", 1209 | "from vertexai.generative_models import GenerativeModel, Part, HarmCategory, HarmBlockThreshold\n", 1210 | "\n", 1211 | "\n", 1212 | "model = GenerativeModel(\"gemini-1.5-flash-001\")\n", 1213 | "\n", 1214 | "# Define safety settings\n", 1215 | "safety_settings = {\n", 1216 | " HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,\n", 1217 | " HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,\n", 1218 | " HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,\n", 1219 | " HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,\n", 1220 | "}\n", 1221 | "\n", 1222 | "# Define generation config\n", 1223 | "generation_config = {\n", 1224 | " \"max_output_tokens\": 2048,\n", 1225 | " \"temperature\": 0.5,\n", 1226 | " \"top_p\": 1\n", 1227 | "}" 1228 | ] 1229 | }, 1230 | { 1231 | "cell_type": "markdown", 1232 | "metadata": { 1233 | "id": "_Ep2DbPB_Rmu" 1234 | }, 1235 | "source": [ 1236 | "### Baseline Retrieval Augmented Generation\n", 1237 | "\n", 1238 | "Below we build a simple RAG pipeline with three helper methods:\n", 1239 | "\n", 1240 | "\n", 1241 | "* `answer_question` -- full RAG operation\n", 1242 | " * `retrieve_context` -- search Redis for relevant sources\n", 1243 | " * `promptify` -- combine system instructions, user question, and sources\n", 1244 | "\n" 1245 | ] 1246 | }, 1247 | { 1248 | "cell_type": "code", 1249 | "execution_count": 20, 1250 | "metadata": { 1251 | "id": "zmma7Cjd7kZ9" 1252 | }, 1253 | "outputs": [], 1254 | "source": [ 1255 | "async def answer_question(index: AsyncSearchIndex, query: str):\n", 1256 | " \"\"\"Answer the user's question\"\"\"\n", 1257 | "\n", 1258 | " SYSTEM_PROMPT = \"\"\"You are a helpful tax analyst assistant that has access\n", 1259 | " to publications from the IRS\n", 1260 | " \"\"\"\n", 1261 | "\n", 1262 | " query_vector = vectorizer.embed(query)\n", 1263 | "\n", 1264 | " # Fetch context from Redis using vector search\n", 1265 | " context = await retrieve_context(index, query_vector)\n", 1266 | "\n", 1267 | " prompt = f'''\n", 1268 | " System: {SYSTEM_PROMPT}\n", 1269 | " User: {promptify(query, context)}\n", 1270 | " '''\n", 1271 | "\n", 1272 | " responses = model.generate_content(\n", 1273 | " [prompt],\n", 1274 | " generation_config=generation_config,\n", 1275 | " safety_settings=safety_settings,\n", 1276 | " stream=False\n", 1277 | " )\n", 1278 | " # Response provided by LLM\n", 1279 | " if(responses.candidates[0].finish_reason.value == 1):\n", 1280 | " return(responses.candidates[0].content.parts[0].text)\n", 1281 | " else:\n", 1282 | " return(f\"Content has been blocked for {responses.candidates[0].finish_reason.name} reasons.\")\n", 1283 | "\n", 1284 | "\n", 1285 | "async def retrieve_context(index: AsyncSearchIndex, query_vector) -> str:\n", 1286 | " \"\"\"Fetch the relevant context from Redis using vector search\"\"\"\n", 1287 | " results = await index.query(\n", 1288 | " VectorQuery(\n", 1289 | " vector=query_vector,\n", 1290 | " vector_field_name=\"text_embedding\",\n", 1291 | " return_fields=[\"content\"],\n", 1292 | " num_results=3\n", 1293 | " )\n", 1294 | " )\n", 1295 | " content = \"\\n\".join([result[\"content\"] for result in results])\n", 1296 | " return content\n", 1297 | "\n", 1298 | "\n", 1299 | "def promptify(query: str, context: str) -> str:\n", 1300 | " return f'''Use the provided context below derived from public documenation to answer the user's question. If you can't answer the user's\n", 1301 | " question, based on the context; do not guess. Do your best finding the answer in the context, but if there is no context at all,\n", 1302 | " respond with \"I don't know\".\n", 1303 | "\n", 1304 | " User question:\n", 1305 | "\n", 1306 | " {query}\n", 1307 | "\n", 1308 | " Helpful context:\n", 1309 | "\n", 1310 | " {context}\n", 1311 | "\n", 1312 | " Answer:\n", 1313 | " '''" 1314 | ] 1315 | }, 1316 | { 1317 | "cell_type": "code", 1318 | "execution_count": 21, 1319 | "metadata": { 1320 | "id": "wIaYNgNxA4D1" 1321 | }, 1322 | "outputs": [], 1323 | "source": [ 1324 | "# Generate a list of questions\n", 1325 | "questions = [\n", 1326 | " \"What is TCC?\",\n", 1327 | " \"Who should apply for an IRIS TCC?\",\n", 1328 | " \"What is a JWK?\",\n", 1329 | " \"Should I buy a yacht??\"\n", 1330 | "]" 1331 | ] 1332 | }, 1333 | { 1334 | "cell_type": "code", 1335 | "execution_count": 22, 1336 | "metadata": { 1337 | "id": "uy7rh-stIIii" 1338 | }, 1339 | "outputs": [], 1340 | "source": [ 1341 | "import asyncio\n", 1342 | "\n", 1343 | "results = await asyncio.gather(*[\n", 1344 | " answer_question(index, question) for question in questions\n", 1345 | "])" 1346 | ] 1347 | }, 1348 | { 1349 | "cell_type": "code", 1350 | "execution_count": 23, 1351 | "metadata": { 1352 | "colab": { 1353 | "base_uri": "https://localhost:8080/" 1354 | }, 1355 | "id": "T_HZnaBo6ylG", 1356 | "outputId": "6c0d1baa-dc27-4ad5-8983-5dd3a98ee9bd" 1357 | }, 1358 | "outputs": [ 1359 | { 1360 | "name": "stdout", 1361 | "output_type": "stream", 1362 | "text": [ 1363 | "What is TCC?: \n", 1364 | "TCC stands for Transmitter Control Code. \n", 1365 | "\n", 1366 | "\n", 1367 | "\n", 1368 | "Who should apply for an IRIS TCC?: \n", 1369 | "If you are transmitting information returns to the IRS or if you are developing software to file information returns electronically, you must apply for one or more TCCs using the IRIS Application for TCC available online. \n", 1370 | "\n", 1371 | "\n", 1372 | "\n", 1373 | "What is a JWK?: \n", 1374 | "A JSON Web Key Set (JWKs) is used for e-Services API authentication. It contains a public key that validates the API consumer application. \n", 1375 | "\n", 1376 | "\n", 1377 | "\n", 1378 | "Should I buy a yacht??: \n", 1379 | "I don't know. \n", 1380 | "\n", 1381 | "\n", 1382 | "\n" 1383 | ] 1384 | } 1385 | ], 1386 | "source": [ 1387 | "for question, result in zip(questions,results):\n", 1388 | " print(question+\": \\n\"+result+\"\\n\\n\")" 1389 | ] 1390 | }, 1391 | { 1392 | "cell_type": "markdown", 1393 | "metadata": { 1394 | "id": "TN3Ok2zJMhdt" 1395 | }, 1396 | "source": [ 1397 | "# Improve performance and cut costs with LLM Semantic Caching" 1398 | ] 1399 | }, 1400 | { 1401 | "cell_type": "code", 1402 | "execution_count": 24, 1403 | "metadata": { 1404 | "id": "QzX8lQ35Mpee" 1405 | }, 1406 | "outputs": [], 1407 | "source": [ 1408 | "from redis import Redis\n", 1409 | "from redisvl.extensions.llmcache import SemanticCache\n", 1410 | "\n", 1411 | "# Create Redis client\n", 1412 | "redis_client = Redis(\n", 1413 | " host=REDIS_HOST,\n", 1414 | " port=REDIS_PORT,\n", 1415 | " password=REDIS_PASSWORD\n", 1416 | ")\n", 1417 | "\n", 1418 | "# Create the Semantic Cache\n", 1419 | "llmcache = SemanticCache(\n", 1420 | " name=\"llmcache\",\n", 1421 | " vectorizer=vectorizer,\n", 1422 | " redis_client=redis_client,\n", 1423 | " ttl=120,\n", 1424 | " distance_threshold=0.2\n", 1425 | ")" 1426 | ] 1427 | }, 1428 | { 1429 | "cell_type": "code", 1430 | "execution_count": 25, 1431 | "metadata": { 1432 | "id": "vaovoOKbMrSG" 1433 | }, 1434 | "outputs": [], 1435 | "source": [ 1436 | "from functools import wraps\n", 1437 | "\n", 1438 | "\n", 1439 | "# Create an LLM caching decorator\n", 1440 | "def cache(func):\n", 1441 | " @wraps(func)\n", 1442 | " async def wrapper(index, query_text, *args, **kwargs):\n", 1443 | " query_vector = llmcache._vectorizer.embed(query_text)\n", 1444 | "\n", 1445 | " # Check the cache with the vector\n", 1446 | " if result := llmcache.check(vector=query_vector):\n", 1447 | " return result[0]['response']\n", 1448 | "\n", 1449 | " response = await func(index, query_text, query_vector=query_vector)\n", 1450 | " llmcache.store(query_text, response, query_vector)\n", 1451 | " return response\n", 1452 | " return wrapper\n", 1453 | "\n", 1454 | "\n", 1455 | "@cache\n", 1456 | "async def answer_question(index: AsyncSearchIndex, query: str, **kwargs):\n", 1457 | " \"\"\"Answer the user's question\"\"\"\n", 1458 | "\n", 1459 | " SYSTEM_PROMPT = \"\"\"You are a helpful tax analyst assistant that has access\n", 1460 | " to publications from the IRS\n", 1461 | " \"\"\"\n", 1462 | "\n", 1463 | " # Fetch context from Redis using vector search\n", 1464 | " context = await retrieve_context(index, kwargs[\"query_vector\"])\n", 1465 | "\n", 1466 | " prompt = f'''\n", 1467 | " System: {SYSTEM_PROMPT}\n", 1468 | " User: {promptify(query, context)}\n", 1469 | " '''\n", 1470 | "\n", 1471 | " responses = model.generate_content(\n", 1472 | " [prompt],\n", 1473 | " generation_config=generation_config,\n", 1474 | " safety_settings=safety_settings,\n", 1475 | " stream=False\n", 1476 | " )\n", 1477 | " # Response provided by LLM\n", 1478 | " if(responses.candidates[0].finish_reason.value == 1):\n", 1479 | " return(responses.candidates[0].content.parts[0].text)\n", 1480 | " else:\n", 1481 | " return(f\"Content has been blocked for {responses.candidates[0].finish_reason.name} reasons.\")\n" 1482 | ] 1483 | }, 1484 | { 1485 | "cell_type": "code", 1486 | "execution_count": 26, 1487 | "metadata": { 1488 | "id": "a4d6PJG01hcz" 1489 | }, 1490 | "outputs": [], 1491 | "source": [ 1492 | "from datetime import datetime" 1493 | ] 1494 | }, 1495 | { 1496 | "cell_type": "code", 1497 | "execution_count": 27, 1498 | "metadata": { 1499 | "colab": { 1500 | "base_uri": "https://localhost:8080/" 1501 | }, 1502 | "id": "yAMpnoVIP7G1", 1503 | "outputId": "5ab491b9-13cc-4981-fa6a-0db7fdf2f0b5" 1504 | }, 1505 | "outputs": [ 1506 | { 1507 | "name": "stdout", 1508 | "output_type": "stream", 1509 | "text": [ 1510 | "Total time: 0:00:00.700675\n" 1511 | ] 1512 | } 1513 | ], 1514 | "source": [ 1515 | "query = \"What is a JWK?\"\n", 1516 | "\n", 1517 | "startTime = datetime.now()\n", 1518 | "await answer_question(index, query)\n", 1519 | "print(f\"Total time: {datetime.now() - startTime}\")" 1520 | ] 1521 | }, 1522 | { 1523 | "cell_type": "code", 1524 | "execution_count": 28, 1525 | "metadata": { 1526 | "colab": { 1527 | "base_uri": "https://localhost:8080/" 1528 | }, 1529 | "id": "aepPokugQBNt", 1530 | "outputId": "5cf7ed3d-fb48-40f1-f678-284b92964a67" 1531 | }, 1532 | "outputs": [ 1533 | { 1534 | "name": "stdout", 1535 | "output_type": "stream", 1536 | "text": [ 1537 | "Total time: 0:00:00.102795\n" 1538 | ] 1539 | } 1540 | ], 1541 | "source": [ 1542 | "# Now try again with semantic caching enabled!\n", 1543 | "query = \"What's a JWK?\"\n", 1544 | "\n", 1545 | "startTime = datetime.now()\n", 1546 | "await answer_question(index, query)\n", 1547 | "print(f\"Total time: {datetime.now() - startTime}\")" 1548 | ] 1549 | }, 1550 | { 1551 | "cell_type": "code", 1552 | "execution_count": null, 1553 | "metadata": { 1554 | "id": "H6jxgpzWA2Ng" 1555 | }, 1556 | "outputs": [], 1557 | "source": [ 1558 | "import gradio as gr\n", 1559 | "\n", 1560 | "async def respond(message, history):\n", 1561 | " print(message)\n", 1562 | " result = await answer_question(index, message)\n", 1563 | " print(result)\n", 1564 | " return result\n", 1565 | "\n", 1566 | "gr.ChatInterface(respond).launch(debug=True)" 1567 | ] 1568 | } 1569 | ], 1570 | "metadata": { 1571 | "colab": { 1572 | "gpuType": "T4", 1573 | "provenance": [] 1574 | }, 1575 | "kernelspec": { 1576 | "display_name": "Python 3", 1577 | "name": "python3" 1578 | }, 1579 | "language_info": { 1580 | "name": "python" 1581 | } 1582 | }, 1583 | "nbformat": 4, 1584 | "nbformat_minor": 0 1585 | } 1586 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Redis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scalable LLM Architectures with Redis & GCP Vertex AI 2 | 3 | ☁️ [Generative AI](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/overview) with Google Vertex AI comes with a specialized [in-console studio experience](https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/quickstart), a [dedicated API for Gemini](https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/api-quickstart) and easy-to-use [Python SDK](https://cloud.google.com/vertex-ai/docs/python-sdk/use-vertex-ai-python-sdk) designed for deploying and managing instances of Google's powerful language models. 4 | 5 | ⚡ Redis Enterprise offers fast and scalable [vector search](https://redis.io/solutions/vector-search/), with an API for index creation, management, blazing-fast search, and hybrid filtering. When coupled with its [versatile data structures](https://redis.io/docs/latest/develop/data-types/) - Redis Enterprise shines as the optimal solution for building high-quality Large Language Model (LLM) apps. 6 | 7 | >This repo serves as a foundational architecture for building LLM applications with Redis and GCP services. 8 | 9 | ## Reference architecture 10 | 11 | ![](assets/Redis_GCN_GenAI_20240325.png) 12 | 13 | 1. Primary Data Sources 14 | 2. Data Extraction and Loading 15 | 3. Large Language Models 16 | - `text-embedding-gecko@003` for embeddings 17 | - `gemini-1.5-flash-001` for LLM generation and chat 18 | 3. High-Performance Data Layer (Redis) 19 | - Semantic caching to improve LLM performance and associated costs 20 | - Vector search for context retrieval from knowledge base 21 | 22 | 23 | ## RAG + Semantic Caching demo 24 | Open In Colab 25 | 26 | **Open the code tutorial using the Colab notebook to get your hands dirty with Redis and Vertex AI on GCP.** 27 | 28 | 29 | ## Additional resources 30 | - [Streamlit PDF chatbot example app](examples/chat-your-pdf/) 31 | - [Redis vector search documentation](https://redis.io/docs/latest/develop/interact/search-and-query/query/vector-search/) 32 | - [Get started with RedisVL](https://redis.io/blog/introducing-the-redis-vector-library-for-enhancing-genai-development/) 33 | - [Google VertexAI resources](https://cloud.google.com/vertex-ai) 34 | - [More Redis ai resources](https://github.com/redis-developer) -------------------------------------------------------------------------------- /assets/GCP_RE_GenAI.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redis-developer/gcp-redis-llm-stack/d61051e79d5a409e85549eb91cd892a5c4bb7618/assets/GCP_RE_GenAI.drawio.png -------------------------------------------------------------------------------- /assets/Redis_GCN_GenAI_20240325.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redis-developer/gcp-redis-llm-stack/d61051e79d5a409e85549eb91cd892a5c4bb7618/assets/Redis_GCN_GenAI_20240325.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This directory contains an *evolving* list of example Gen AI applications built on GCP with Redis Enterprise. 4 | 5 | | Name | Description | 6 | | ------- | ------- | 7 | | [Chat Your PDF!](chat-your-pdf/) | Explore building a PDF chatbot based on GCP's Palm 2 Chat Completion model -- ALL powered by Redis. | 8 | 9 | -------------------------------------------------------------------------------- /examples/chat-your-pdf/.env.example: -------------------------------------------------------------------------------- 1 | CHUNK_SIZE=500 2 | CHUNK_OVERLAP=100 3 | DOCS_FOLDER="pdfs/" 4 | REDIS_URL="redis://redis:6379" 5 | GCP_PROJECT_ID="YOUR_GCP_PROJECT_NAME" 6 | GCP_LOCATION="YOUR_VERTEXAI_REGION" -------------------------------------------------------------------------------- /examples/chat-your-pdf/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.10-slim-buster 2 | 3 | RUN apt-get update && apt-get install python-tk python3-tk tk-dev git -y 4 | 5 | WORKDIR /app 6 | 7 | COPY app/requirements.txt . 8 | 9 | RUN pip install -r requirements.txt 10 | 11 | EXPOSE 8080 12 | 13 | CMD ["streamlit", "run", "main.py", "--server.port", "8080", "--server.enableXsrfProtection", "false"] -------------------------------------------------------------------------------- /examples/chat-your-pdf/README.md: -------------------------------------------------------------------------------- 1 | # 📃 Chat Your PDF! 2 | 3 | This example Streamlit app demonstrates how to build a simple chatbot powered by Redis, LangChain, and Google's Vertex AI. It contains the following elements: 4 | 5 | - ⚙️ [LangChain](https://python.langchain.com/docs/get_started/introduction.html) for app orchestration, agent construction, and tools 6 | - 🖥️ [Streamlit](https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps) for the front end and conversational interface 7 | - ☁️ [GCP Vertex AI Palm 2](https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/api-quickstart) models for embedding creation and chat completion 8 | - 💥 [Redis](https://redis.io) and [RedisVL client](https://redisvl.com) for Retrieval-Augmented Generation (RAG), LLM Semantic Caching, and chat history persistence 9 | 10 | ![Chat Your PDF screenshot](https://github.com/redis-developer/gcp-redis-llm-stack/assets/20304844/fdf24716-965a-4a66-88b2-761427ff61c0) 11 | 12 | ## About 13 | Redis is well-versed to power chatbots thanks to its flexible data models, query engine, and high performance. This enables users to leverage redis for a variety of gen AI needs: 14 | - **RAG** -- ensures that relevant context is retrieved from Redis as a [Vector Database](https://redis.com/solutions/use-cases/vector-database), given a users question 15 | - **Semantic Caching** -- ensures that duplicate requests for identical or very *similar* information are not exhuastive. Ex: 16 | ```bash 17 | streamlit | Full Response Time (secs) 1.6435627937316895 18 | streamlit | Cache Response Time (secs) 0.11130380630493164 19 | ``` 20 | - **Chat History** -- ensures distributed & low latency access to conversation history in Redis [Lists](https://redis.io/docs/data-types/lists/) 21 | 22 | ## Setting up the app environment 23 | 24 | ### Obtain google cloud credentials 25 | You need a valid GCP project, service account, an an application JSON key file in order to auth with GCP. The credentials file will be mounted to the docker container of the chatbot app and exposed through the `GOOGLE_APPLICATION_CREDENTIALS` environment variable. For more information [check out this link](https://cloud.google.com/docs/authentication/application-default-credentials#GAC). 26 | 27 | 28 | 1) **Download a GCP credentials JSON file**: 29 | - Go to "IAM & Admin" panel in the GCP console. 30 | - On the left navbar select "Service Accounts". 31 | - Select the name of the service account. 32 | - On the top bar, select the "Keys" tab. 33 | - Above the list of active keys, select "ADD KEY" to create a new JSON key file. 34 | 2) **Move the credentials file into the root level of this folder here as `gcp_credentials.json`** 35 | ```bash 36 | mv ~/Downloads/.json app/gcp_credentials.json 37 | ``` 38 | 39 | ### Update environment configuration 40 | The project comes with a template `.env.template` file with the following values. Make a coy of this as `.env`. Update the values below accordingly. 41 | 42 | ```bash 43 | CHUNK_SIZE=500 44 | CHUNK_OVERLAP=100 45 | DOCS_FOLDER="pdfs/" 46 | REDIS_URL="redis://localhost:6379" 47 | GCP_PROJECT_ID="YOUR_GCP_PROJECT_NAME" #nifty-456098 48 | GCP_LOCATION="YOUR_VERTEXAI_REGION" #us-central1 49 | ``` 50 | 51 | - Update the `GCP_PROJECT_ID` and `GCP_LOCATION` variables based on your GCP project and vertex AI configuration. 52 | - Update the `REDIS_URL` based on your Redis Enterprise database deployed in GCP. 53 | 54 | 55 | ## Run 56 | 57 | To run the app, follow these steps: 58 | 59 | 1. Clone this repository to your local machine. 60 | 2. Set up your GCP credentials as outlined above. 61 | 3. Copy the `.env.template` to `.env` and configure the values as outlined above. 62 | 4. Run the app with Docker compose: `docker-compose up. 63 | 5. Hit your brower at http://localhost:8080/. 64 | 65 | -------------------------------------------------------------------------------- /examples/chat-your-pdf/app/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class AppConfig: 5 | DOCS_FOLDER=os.environ["DOCS_FOLDER"] 6 | REDIS_URL=os.environ["REDIS_URL"] 7 | GCP_PROJECT_ID=os.environ["GCP_PROJECT_ID"] 8 | GCP_LOCATION=os.environ["GCP_LOCATION"] 9 | GOOGLE_APPLICATION_CREDENTIALS=os.environ["GOOGLE_APPLICATION_CREDENTIALS"] 10 | CHUNK_SIZE=int(os.getenv("CHUNK_SIZE", 500)) 11 | CHUNK_OVERLAP=int(os.getenv("CHUNK_OVERLAP", 10)) 12 | PAGE_TITLE=os.getenv("PAGE_TITLE", "📃 Chat Your PDF") 13 | PAGE_ICON=os.getenv("PAGE_ICON", "📃") 14 | RETRIEVE_TOP_K=int(os.getenv("RETRIEVE_TOP_K", 5)) 15 | LLMCACHE_THRESHOLD=float(os.getenv("LLMCACHE_THRESHOLD", 0.15)) 16 | -------------------------------------------------------------------------------- /examples/chat-your-pdf/app/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import streamlit as st 3 | import uuid 4 | 5 | from time import time 6 | 7 | from config import AppConfig 8 | 9 | from redisvl.extensions.llmcache import SemanticCache 10 | from redisvl.utils.vectorize import VertexAITextVectorizer 11 | 12 | from langchain.chat_models import ChatVertexAI 13 | from langchain.document_loaders import PyPDFLoader 14 | from langchain.memory import ConversationBufferMemory 15 | from langchain.memory.chat_message_histories import RedisChatMessageHistory 16 | from langchain.embeddings import VertexAIEmbeddings 17 | from langchain.callbacks.base import BaseCallbackHandler 18 | from langchain.vectorstores import Redis 19 | from langchain.text_splitter import RecursiveCharacterTextSplitter 20 | from langchain.agents.agent_toolkits import create_retriever_tool 21 | from langchain.agents import AgentType 22 | from langchain.agents import initialize_agent 23 | from dotenv import load_dotenv 24 | load_dotenv() 25 | 26 | 27 | # Load Global env 28 | 29 | load_dotenv() 30 | 31 | config = AppConfig() 32 | 33 | if "session_id" not in st.session_state: 34 | st.session_state.session_id = uuid.uuid4().hex 35 | 36 | 37 | # Helpers 38 | 39 | @st.cache_resource() 40 | def configure_retriever(path): 41 | """Create the Redis Vector DB retrieval tool""" 42 | # Read documents 43 | docs = [] 44 | for file in os.listdir(path): 45 | print(file, flush=True) 46 | loader = PyPDFLoader(os.path.join(path, file)) 47 | docs.extend(loader.load()) 48 | # Split documents 49 | text_splitter = RecursiveCharacterTextSplitter( 50 | chunk_size=config.CHUNK_SIZE, chunk_overlap=config.CHUNK_OVERLAP 51 | ) 52 | splits = text_splitter.split_documents(docs) 53 | # Create embeddings and store in vectordb 54 | embeddings = VertexAIEmbeddings(project=config.GCP_PROJECT_ID, location=config.GCP_LOCATION) 55 | vectordb = Redis.from_documents( 56 | splits, embeddings, redis_url=config.REDIS_URL, index_name="chatbot" 57 | ) 58 | # Define retriever 59 | retriever = vectordb.as_retriever(search_kwargs={"k": config.RETRIEVE_TOP_K}) 60 | tool = create_retriever_tool(retriever, "search_chevy_manual", "Searches and returns snippets from the Chevy Colorado 2022 car manual.") 61 | return tool 62 | 63 | 64 | @st.cache_resource() 65 | def configure_cache(): 66 | """Set up the Redis LLMCache built with VertexAI Text Embeddings""" 67 | llmcache_embeddings = VertexAITextVectorizer( 68 | api_config={"project_id": config.GCP_PROJECT_ID, "location": config.GCP_LOCATION} 69 | ) 70 | return SemanticCache( 71 | redis_url=config.REDIS_URL, 72 | threshold=config.LLMCACHE_THRESHOLD, # semantic similarity threshold 73 | vectorizer=llmcache_embeddings 74 | ) 75 | 76 | 77 | def configure_agent(chat_memory, tools: list): 78 | """Configure the conversational chat agent that can use the Redis vector db for RAG""" 79 | memory = ConversationBufferMemory( 80 | memory_key="chat_history", chat_memory=chat_memory, return_messages=True 81 | ) 82 | chatLLM = ChatVertexAI( 83 | temperature=0.1, 84 | project=config.GCP_PROJECT_ID, 85 | location=config.GCP_LOCATION 86 | ) 87 | PREFIX = """"You are a friendly AI assistant that can help you understand your Chevy 2022 Colorado vehicle based on the provided PDF car manual. Users can ask questions of your manual! You should not make anything up.""" 88 | 89 | FORMAT_INSTRUCTIONS = """You have access to the following tools: 90 | 91 | {tools} 92 | 93 | Use the following format: 94 | 95 | ''' 96 | Question: the input question you must answer 97 | Thought: you should always think about what to do 98 | Action: the action to take, should be one of [{tool_names}] 99 | Action Input: the input to the action 100 | Observation: the result of the action 101 | ... (this Thought/Action/Action Input/Observation can repeat N times) 102 | Thought: I now know the final answer 103 | Final Answer: the final answer to the original input question 104 | ''' 105 | 106 | When you have gathered all the information required, respond to the user in a friendly manner. 107 | """ 108 | 109 | SUFFIX = """ 110 | 111 | Begin! Remember to give detailed, informative answers 112 | 113 | Previous conversation history: 114 | {chat_history} 115 | 116 | New question: {input} 117 | {agent_scratchpad} 118 | """ 119 | return initialize_agent( 120 | tools, 121 | chatLLM, 122 | agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, 123 | verbose=True, 124 | memory=memory, 125 | agent_kwargs={ 126 | 'prefix': PREFIX, 127 | 'format_instructions': FORMAT_INSTRUCTIONS, 128 | 'suffix': SUFFIX 129 | } 130 | ) 131 | 132 | 133 | class PrintRetrievalHandler(BaseCallbackHandler): 134 | """Callback to print retrieved source documents from Redis during RAG.""" 135 | def __init__(self, container): 136 | self.container = container.expander("Context Retrieval") 137 | 138 | def on_retriever_start(self, query: str, **kwargs): 139 | self.container.write(f"**Question:** {query}") 140 | 141 | def on_retriever_end(self, documents, **kwargs): 142 | # self.container.write(documents) 143 | for idx, doc in enumerate(documents): 144 | source = os.path.basename(doc.metadata["source"]) 145 | self.container.write(f"**Document {idx} from {source}**") 146 | self.container.markdown(doc.page_content) 147 | 148 | 149 | def generate_response( 150 | use_cache: bool, 151 | llmcache: SemanticCache, 152 | user_query: str, 153 | agent 154 | ) -> str: 155 | """Generate a response to the user's question after checking the cache (if enabled).""" 156 | t0 = time() 157 | if use_cache: 158 | if response := llmcache.check(user_query): 159 | print("Cache Response Time (secs)", time()-t0, flush=True) 160 | return response[0]['response'] 161 | 162 | retrieval_handler = PrintRetrievalHandler(st.container()) 163 | response = agent.run(input=user_query, callbacks=[retrieval_handler]) 164 | print("Full Response Time (secs)", time()-t0, flush=True) 165 | return response 166 | 167 | 168 | def render(): 169 | """Render the Streamlit chatbot user interface.""" 170 | # Main Page 171 | st.set_page_config(page_title=config.PAGE_TITLE, page_icon=config.PAGE_ICON) 172 | st.title(config.PAGE_TITLE) 173 | 174 | # Setup LLMCache in Redis 175 | llmcache = configure_cache() 176 | 177 | # Setup Redis memory for conversation history 178 | msgs = RedisChatMessageHistory( 179 | session_id=st.session_state.session_id, url=config.REDIS_URL 180 | ) 181 | 182 | # Sidebar 183 | with st.sidebar: 184 | use_cache = st.checkbox("Use LLM cache?") 185 | if st.button("Clear LLM cache"): 186 | llmcache.clear() 187 | if len(msgs.messages) == 0 or st.button("Clear message history"): 188 | msgs.clear() 189 | 190 | 191 | # Setup Redis vector db retrieval 192 | retriever = configure_retriever(config.DOCS_FOLDER) 193 | 194 | # Configure Agent 195 | agent = configure_agent(chat_memory=msgs, tools=[retriever]) 196 | 197 | # Chat Interface 198 | avatars = {"human": "user", "ai": "assistant"} 199 | for msg in msgs.messages: 200 | if msg.type in avatars: 201 | with st.chat_message(avatars[msg.type]): 202 | st.markdown(msg.content) 203 | 204 | if user_query := st.chat_input(placeholder="Ask me anything about the 2022 Chevy Colorado!"): 205 | st.chat_message("user").write(user_query) 206 | 207 | with st.chat_message("assistant"): 208 | response = generate_response(use_cache, llmcache, user_query, agent) 209 | st.markdown(response) 210 | if use_cache: 211 | # TODO - should we cache responses that were used from the cache? 212 | llmcache.store(user_query, response) 213 | 214 | 215 | if __name__ == "__main__": 216 | render() 217 | -------------------------------------------------------------------------------- /examples/chat-your-pdf/app/pdfs/2022-chevrolet-colorado-ebrochure.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redis-developer/gcp-redis-llm-stack/d61051e79d5a409e85549eb91cd892a5c4bb7618/examples/chat-your-pdf/app/pdfs/2022-chevrolet-colorado-ebrochure.pdf -------------------------------------------------------------------------------- /examples/chat-your-pdf/app/requirements.txt: -------------------------------------------------------------------------------- 1 | redisvl 2 | langchain 3 | openai>=1.13.0 4 | python-dotenv==0.21.0 5 | streamlit==1.24.1 6 | streamlit-chat 7 | google-cloud-aiplatform>=1.26. 8 | sentence-transformers==2.2.2 9 | altair==4.0 10 | pypdf 11 | -------------------------------------------------------------------------------- /examples/chat-your-pdf/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | redis: 5 | image: redis/redis-stack:latest 6 | ports: 7 | - "6379:6379" 8 | - "8001:8001" 9 | volumes: 10 | - redis_data:/data 11 | healthcheck: 12 | test: ["CMD", "redis-cli", "-h", "localhost", "-p", "6379", "ping"] 13 | interval: 2s 14 | timeout: 1m30s 15 | retries: 5 16 | start_period: 5s 17 | chatbot: 18 | container_name: streamlit 19 | build: 20 | context: ./ 21 | volumes: 22 | - ./app/:/app 23 | ports: 24 | - "8080:8080" 25 | env_file: 26 | - .env 27 | environment: 28 | - GOOGLE_APPLICATION_CREDENTIALS=/app/gcp_credentials.json 29 | depends_on: 30 | redis: 31 | condition: service_healthy 32 | # jupyter: 33 | # image: jupyter/minimal-notebook:latest 34 | # container_name: jupyter 35 | # volumes: 36 | # - ./:/home/jovyan/ 37 | # ports: 38 | # - "8888:8888" 39 | # depends_on: 40 | # - "redis" 41 | # environment: 42 | # JUPYTER_ENABLE_LAB: "yes" 43 | 44 | volumes: 45 | redis_data: --------------------------------------------------------------------------------