├── .env.example ├── .gitignore ├── Dockerfile ├── README.md ├── app ├── __init__.py ├── chain.py └── server.py ├── poetry.lock ├── pyproject.toml └── test_app.ipynb /.env.example: -------------------------------------------------------------------------------- 1 | PINECONE_API_KEY= 2 | PINECONE_ENVIRONMENT= 3 | PINECONE_INDEX_NAME= 4 | COHERE_API_KEY= 5 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | __pycache__ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | RUN pip install poetry==1.6.1 4 | 5 | RUN poetry config virtualenvs.create false 6 | 7 | WORKDIR /code 8 | 9 | COPY ./pyproject.toml ./README.md ./poetry.lock* ./ 10 | 11 | RUN poetry install --no-interaction --no-ansi --no-root 12 | 13 | COPY ./app ./app 14 | 15 | RUN poetry install --no-interaction --no-ansi 16 | 17 | EXPOSE 8080 18 | 19 | CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Pinecone-Serverless 2 | 3 | We see demand for tools that bridge the gap between prototyping and production. With usage based pricing and support for unlimited scaling, Pinecone Serverless helps to address pain points with vectorstore productionization that we've seen from the community. This repo builds a RAG chain that connects to Pinecone Serverless index using LCEL, turns it into an a web service with LangServe, uses Hosted LangServe deploy it, and uses LangSmith to monitor the input / outputs.   4 | 5 | ![chain](https://github.com/langchain-ai/pinecone-serverless/assets/122662504/454266ba-727c-4ce0-ae56-7d004c0fb5d4) 6 | 7 | ### Index 8 | 9 | Follow [instructions from Pinecone](https://www.pinecone.io/blog/serverless/) on setting up your serverless index. 10 | 11 | ### API keys 12 | 13 | Ensure these are set: 14 | 15 | * PINECONE_API_KEY 16 | * PINECONE_ENVIRONMENT 17 | * PINECONE_INDEX_NAME 18 | * OPENAI_API_KEY 19 | 20 | Note: the choice of embedding model may require additional API keys, such as: 21 | * COHERE_API_KEY 22 | 23 | ### Notebook 24 | 25 | For prototyping: 26 | ``` 27 | poetry run jupyter notebook 28 | ``` 29 | 30 | ### Deployment 31 | 32 | This repo was created by following these steps: 33 | 34 | **(1) Create a LangChain app.** 35 | 36 | Run: 37 | ``` 38 | langchain app new . 39 | ``` 40 | 41 | This creates two folders: 42 | ``` 43 | app: This is where LangServe code will live 44 | packages: This is where your chains or agents will live 45 | ``` 46 | 47 | It also creates: 48 | ``` 49 | Dockerfile: App configurations 50 | pyproject.toml: Project configurations 51 | ``` 52 | 53 | Add your app dependencies to `pyproject.toml` and `poetry.lock` to support Pinecone serverless: 54 | ``` 55 | poetry add pinecone-client==3.0.0.dev8 56 | poetry add langchain-community==0.0.12 57 | poetry add cohere 58 | poetry add openai 59 | poetry add jupyter 60 | ``` 61 | 62 | Update enviorment based on the updated lock file: 63 | ``` 64 | poetry install 65 | ``` 66 | 67 | **(2) Add your runnable (RAG app)** 68 | 69 | Create a file, `chain.py` with a runnable named `chain` that you want to execute. 70 | 71 | This is our RAG logic (e.g., that we prototyped in our notebook). 72 | 73 | Add `chain.py` to `app` directory. 74 | 75 | Import the LCEL object in `server.py`: 76 | ``` 77 | from app.chain import chain as pinecone_wiki_chain 78 | add_routes(app, pinecone_wiki_chain, path="/pinecone-wikipedia") 79 | ``` 80 | 81 | Run locally 82 | ``` 83 | poetry run langchain serve 84 | ``` 85 | 86 | **(3) Deploy it with hosted LangServe** 87 | 88 | Go to your LangSmith console. 89 | 90 | Select `New Deployment`. 91 | 92 | Specify this Github url. 93 | 94 | Add the abovementioned API keys as secrets. 95 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/pinecone-serverless/9fe6b5c1b394e97d79573bf1f3970e399af881a0/app/__init__.py -------------------------------------------------------------------------------- /app/chain.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from langchain_community.chat_models import ChatOpenAI 5 | from langchain_community.embeddings import CohereEmbeddings 6 | from langchain_community.vectorstores import Pinecone 7 | from langchain_core.output_parsers import StrOutputParser 8 | from langchain_core.prompts import ChatPromptTemplate 9 | from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda 10 | from pinecone import Pinecone as PineconeClient 11 | import requests 12 | load_dotenv() 13 | 14 | # Keys 15 | PINECONE_API_KEY = os.environ["PINECONE_API_KEY"] 16 | PINECONE_ENVIRONMENT = os.environ["PINECONE_ENVIRONMENT"] 17 | PINECONE_INDEX_NAME = os.environ["PINECONE_INDEX_NAME"] 18 | 19 | pinecone = PineconeClient(api_key=PINECONE_API_KEY, 20 | environment=PINECONE_ENVIRONMENT) 21 | 22 | embeddings = CohereEmbeddings(model="multilingual-22-12") 23 | vectorstore = Pinecone.from_existing_index(index_name=PINECONE_INDEX_NAME, 24 | embedding=embeddings) 25 | 26 | retriever = vectorstore.as_retriever() 27 | 28 | def fetch_wikipedia_page(id): 29 | url = f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&pageids={id}" 30 | response = requests.get(url) 31 | data = response.json() 32 | page_content = list(data['query']['pages'].values())[0]['extract'] 33 | return page_content 34 | 35 | def fetch_url(x): 36 | urls = [doc.metadata['url'] for doc in x['context']] 37 | ids = [url.split('=')[-1] for url in urls] 38 | contents = [fetch_wikipedia_page(id)[:32000] for id in ids] 39 | return {"context": contents, "question": x["question"]} 40 | 41 | 42 | # RAG prompt 43 | template = """Answer the question based only on the following context: 44 | {context} 45 | Question: {question} 46 | """ 47 | prompt = ChatPromptTemplate.from_template(template) 48 | 49 | # RAG 50 | model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview") 51 | 52 | chain = ( 53 | RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) 54 | | RunnableLambda(fetch_url) # Add this line 55 | | prompt 56 | | model 57 | | StrOutputParser() 58 | ) 59 | 60 | 61 | -------------------------------------------------------------------------------- /app/server.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from fastapi.responses import RedirectResponse 3 | from langserve import add_routes 4 | from app.chain import chain as pinecone_wiki_chain 5 | 6 | app = FastAPI() 7 | 8 | 9 | @app.get("/") 10 | async def redirect_root_to_docs(): 11 | return RedirectResponse("/docs") 12 | 13 | 14 | # Edit this to add the chain you want to add 15 | add_routes(app, pinecone_wiki_chain, path="/pinecone-wikipedia") 16 | 17 | if __name__ == "__main__": 18 | import uvicorn 19 | 20 | uvicorn.run(app, host="0.0.0.0", port=8000) 21 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pinecone-wikipedia" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Lance Martin "] 6 | readme = "README.md" 7 | packages = [ 8 | { include = "app" }, 9 | ] 10 | 11 | [tool.poetry.dependencies] 12 | python = ">=3.11,<3.13" 13 | uvicorn = "^0.23.2" 14 | langserve = {extras = ["server"], version = ">=0.0.30"} 15 | pydantic = "<2" 16 | pinecone-client = "3.0.0" 17 | cohere = "^4.40" 18 | openai = "^1.6.1" 19 | python-dotenv = "^1.0.0" 20 | langchain-community = ">=0.0.13,<0.1" 21 | jupyter = "^1.0.0" 22 | 23 | [tool.poetry.group.dev.dependencies] 24 | langchain-cli = ">=0.0.15" 25 | 26 | [build-system] 27 | requires = ["poetry-core"] 28 | build-backend = "poetry.core.masonry.api" 29 | -------------------------------------------------------------------------------- /test_app.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "id": "9e545fe7-e7ec-4b16-89ae-6820bb534454", 7 | "metadata": { 8 | "scrolled": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import os\n", 13 | "from langchain_community.chat_models import ChatOpenAI\n", 14 | "from langchain_community.embeddings import CohereEmbeddings\n", 15 | "from langchain_community.vectorstores import Pinecone\n", 16 | "from langchain_core.output_parsers import StrOutputParser\n", 17 | "from langchain_core.prompts import ChatPromptTemplate\n", 18 | "from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda\n", 19 | "from pinecone import Pinecone as PineconeClient\n", 20 | "import requests" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 8, 26 | "id": "6eed122c-aa7e-4856-ba01-66d6362d9758", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# Keys\n", 31 | "PINECONE_API_KEY = os.environ[\"PINECONE_API_KEY\"]\n", 32 | "PINECONE_ENVIRONMENT = os.environ[\"PINECONE_ENVIRONMENT\"]\n", 33 | "PINECONE_INDEX_NAME = os.environ[\"PINECONE_INDEX_NAME\"]" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "5621b9f9-f277-4d06-8bb0-72644792f2f8", 39 | "metadata": {}, 40 | "source": [ 41 | "Serverless index from [this dataset](https://huggingface.co/datasets/Cohere/wikipedia-22-12)." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 9, 47 | "id": "3de2de5e-aad0-4ca3-b816-f2cb07b2d251", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# Init\n", 52 | "pinecone = PineconeClient(api_key=PINECONE_API_KEY,\n", 53 | " environment=PINECONE_ENVIRONMENT)\n", 54 | "\n", 55 | "embeddings = CohereEmbeddings(model=\"multilingual-22-12\")\n", 56 | "vectorstore = Pinecone.from_existing_index(index_name=PINECONE_INDEX_NAME, embedding=embeddings)\n", 57 | "retriever = vectorstore.as_retriever()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 10, 63 | "id": "9586b0fa-8555-4bcc-8cfc-ffea62fddc67", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# RAG prompt\n", 68 | "template = \"\"\"Answer the question based only on the following context:\n", 69 | "{context}\n", 70 | "Question: {question}\n", 71 | "\"\"\"\n", 72 | "prompt = ChatPromptTemplate.from_template(template)\n", 73 | "\n", 74 | "# RAG\n", 75 | "model = ChatOpenAI(temperature=0, \n", 76 | " model=\"gpt-4-1106-preview\")\n", 77 | "\n", 78 | "chain = (\n", 79 | " RunnableParallel({\"context\": retriever, \"question\": RunnablePassthrough()})\n", 80 | " | prompt\n", 81 | " | model\n", 82 | " | StrOutputParser()\n", 83 | ")" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 11, 89 | "id": "81c13ad6-cebb-4789-a9e6-816bca4e66bb", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "'Film noir is a genre of film that is characterized by a set of attributes that provoke ongoing debate regarding its definition. According to French critics, including Étienne Chaumeton, in their 1955 book \"Panorama du film noir américain 1941–1953\" (\"A Panorama of American Film Noir\"), film noir can be described as oneiric (dreamlike), strange, erotic, ambivalent, and cruel, although not every film noir necessarily embodies all these attributes to the same degree. The genre is known for its complexity, with films that may feature crime and violence, complex characters and plot-lines, mystery, and moral ambivalence.\\n\\nFilm noir is also recognized for its visual style, which may include the use of chiaroscuro lighting techniques, and is often associated with a bleak societal perspective, offering a critique on global capitalism and consumerism. This is particularly evident in the neon-noir sub-genre, which emphasizes the socio-critical aspects of film noir and often includes long shots or montages of dark and menacing cityscapes.\\n\\nThe defining characteristics of film noir are a source of controversy among critics, with some focusing on the genre\\'s tragic or bleak conclusions, distinctive visual style, plot and character types, mood, and attitude. Despite the many attempts to define film noir, it remains an elusive phenomenon that is difficult to pin down with a definitive set of identifying characteristics.'" 96 | ] 97 | }, 98 | "execution_count": 11, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "chain.invoke(\"what is film noir?\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "id": "4568a49d-91f7-4d5f-8514-9f5a447f5027", 110 | "metadata": {}, 111 | "source": [ 112 | "Extract full wiki page." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 12, 118 | "id": "ea74ba28-668c-4bc0-b262-568298e13533", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def fetch_wikipedia_page(id):\n", 123 | " url = f\"https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&pageids={id}\"\n", 124 | " response = requests.get(url)\n", 125 | " data = response.json()\n", 126 | " page_content = list(data['query']['pages'].values())[0]['extract']\n", 127 | " return page_content\n", 128 | "\n", 129 | "def fetch_url(x):\n", 130 | " urls = [doc.metadata['url'] for doc in x['context']]\n", 131 | " ids = [url.split('=')[-1] for url in urls]\n", 132 | " # First 32k tokens\n", 133 | " contents = [fetch_wikipedia_page(id)[:32000] for id in ids] \n", 134 | " return {\"context\": contents, \"question\": x[\"question\"]}\n", 135 | "\n", 136 | "# RAG\n", 137 | "model = ChatOpenAI(temperature=0, \n", 138 | " model=\"gpt-4-1106-preview\")\n", 139 | "\n", 140 | "chain = (\n", 141 | " RunnableParallel({\"context\": retriever, \"question\": RunnablePassthrough()})\n", 142 | " | RunnableLambda(fetch_url) \n", 143 | " | prompt\n", 144 | " | model\n", 145 | " | StrOutputParser()\n", 146 | ")" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 13, 152 | "id": "10d7eb82-c9ad-4ddd-a543-72b7fa40d009", 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "'Film noir is a cinematic term used primarily to describe stylized Hollywood crime dramas, particularly those that emphasize cynical attitudes and motivations. The classic period of American film noir is generally regarded as the 1940s and 1950s. Film noir of this era is associated with a low-key, black-and-white visual style that has roots in German Expressionist cinematography. It encompasses a range of plots and central figures, including private investigators, plainclothes police officers, aging boxers, hapless grifters, law-abiding citizens lured into a life of crime, femme fatales, or victims of circumstance.'" 159 | ] 160 | }, 161 | "execution_count": 13, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "chain.invoke(\"what is film noir?\")" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3 (ipykernel)", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.11.4" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 5 192 | } 193 | --------------------------------------------------------------------------------