├── Chat_with_CSV_&_Excel_using_LangChain_and_OpenAI.ipynb └── README.md /Chat_with_CSV_&_Excel_using_LangChain_and_OpenAI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "Code Credit - https://twitter.com/pwang_szn/status/1642104548109201410/photo/1\n" 21 | ], 22 | "metadata": { 23 | "id": "r0alwu2qZzGU" 24 | } 25 | }, 26 | { 27 | "cell_type": "code", 28 | "source": [ 29 | "!pip install -q langchain openai chromadb\n" 30 | ], 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "id": "JGXR0-MsaE1T", 36 | "outputId": "5d097565-7a4b-4f2b-a3f3-09a733a43f6f" 37 | }, 38 | "execution_count": 20, 39 | "outputs": [ 40 | { 41 | "output_type": "stream", 42 | "name": "stdout", 43 | "text": [ 44 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.3/46.3 KB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 45 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m927.3/927.3 KB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 46 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 KB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 47 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.2/15.2 MB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 48 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 KB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 49 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 50 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.8/57.8 KB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 51 | "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", 52 | " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", 53 | " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 54 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.1/57.1 KB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 55 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m52.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 56 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 57 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 KB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 58 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m91.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 59 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m60.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 60 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.8/199.8 KB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 61 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 KB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 62 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.5/129.5 KB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 63 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.2/4.2 MB\u001b[0m \u001b[31m51.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 64 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 65 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.9/417.9 KB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 66 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.6/80.6 KB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 67 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m66.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 68 | "\u001b[?25h Building wheel for hnswlib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 69 | " Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n" 70 | ] 71 | } 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "source": [ 77 | "from langchain.document_loaders import CSVLoader\n", 78 | "from langchain.indexes import VectorstoreIndexCreator\n", 79 | "from langchain.chains import RetrievalQA\n", 80 | "from langchain.llms import OpenAI\n", 81 | "import os" 82 | ], 83 | "metadata": { 84 | "id": "0X1EX8eRcRCk" 85 | }, 86 | "execution_count": 11, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "source": [ 92 | "Get your OpenAI Key from here - https://platform.openai.com/account/api-keys" 93 | ], 94 | "metadata": { 95 | "id": "aJOrUeJedzMF" 96 | } 97 | }, 98 | { 99 | "cell_type": "code", 100 | "source": [ 101 | "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxx\"" 102 | ], 103 | "metadata": { 104 | "id": "Zmk3KQMqZvsO" 105 | }, 106 | "execution_count": 8, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "source": [ 112 | "!wget https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv " 113 | ], 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/" 117 | }, 118 | "id": "odUgyhg_c4Co", 119 | "outputId": "6f74b7ff-edb6-44da-82a1-8a8b07e1f811" 120 | }, 121 | "execution_count": 38, 122 | "outputs": [ 123 | { 124 | "output_type": "stream", 125 | "name": "stdout", 126 | "text": [ 127 | "--2023-04-02 19:42:57-- https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv\n", 128 | "Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...\n", 129 | "Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|185.199.108.133|:443... connected.\n", 130 | "HTTP request sent, awaiting response... 200 OK\n", 131 | "Length: 44028 (43K) [text/plain]\n", 132 | "Saving to: ‘pokemon.csv’\n", 133 | "\n", 134 | "\rpokemon.csv 0%[ ] 0 --.-KB/s \rpokemon.csv 100%[===================>] 43.00K --.-KB/s in 0.01s \n", 135 | "\n", 136 | "2023-04-02 19:42:57 (3.98 MB/s) - ‘pokemon.csv’ saved [44028/44028]\n", 137 | "\n" 138 | ] 139 | } 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "source": [ 145 | "# Load the documents\n", 146 | "loader = CSVLoader(file_path='pokemon.csv')" 147 | ], 148 | "metadata": { 149 | "id": "TR8dXxxHbY_b" 150 | }, 151 | "execution_count": 39, 152 | "outputs": [] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "source": [ 157 | "# Create an index using the loaded documents\n", 158 | "index_creator = VectorstoreIndexCreator()\n", 159 | "docsearch = index_creator.from_loaders([loader])" 160 | ], 161 | "metadata": { 162 | "colab": { 163 | "base_uri": "https://localhost:8080/" 164 | }, 165 | "id": "3P4s3IKXaar8", 166 | "outputId": "602eedc3-7adb-4871-c685-c77c43544058" 167 | }, 168 | "execution_count": 40, 169 | "outputs": [ 170 | { 171 | "output_type": "stream", 172 | "name": "stderr", 173 | "text": [ 174 | "WARNING:chromadb:Using embedded DuckDB without persistence: data will be transient\n" 175 | ] 176 | } 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 41, 182 | "metadata": { 183 | "id": "-vruq68YZnmL" 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "# Create a question-answering chain using the index\n", 188 | "chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=docsearch.vectorstore.as_retriever(), input_key=\"question\")\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "source": [ 194 | "# Pass a query to the chain\n", 195 | "query = \"Do you have a column called age?\"\n", 196 | "response = chain({\"question\": query})" 197 | ], 198 | "metadata": { 199 | "id": "Jwy7gjr0aXBr" 200 | }, 201 | "execution_count": 61, 202 | "outputs": [] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "source": [ 207 | "print(response['result'])" 208 | ], 209 | "metadata": { 210 | "colab": { 211 | "base_uri": "https://localhost:8080/" 212 | }, 213 | "id": "9D8ajM74eC_c", 214 | "outputId": "22175dff-7b62-495e-8d98-9928095e5778" 215 | }, 216 | "execution_count": 62, 217 | "outputs": [ 218 | { 219 | "output_type": "stream", 220 | "name": "stdout", 221 | "text": [ 222 | " No, we do not have a column called age.\n" 223 | ] 224 | } 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "source": [], 230 | "metadata": { 231 | "id": "G7gSpkQ0eEmM" 232 | }, 233 | "execution_count": null, 234 | "outputs": [] 235 | } 236 | ] 237 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csvchat-langchain 2 | CSV Chat with LangChain and OpenAI 3 | --------------------------------------------------------------------------------