├── .gitignore ├── README.md ├── localbot.py ├── localbot_adapted.py ├── requirements.txt ├── slack_manifest.json └── slackbot.py /.gitignore: -------------------------------------------------------------------------------- 1 | .venv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Connector dev bot 2 | 3 | This is the code for the [tutorial published on the Airbyte blog](https://airbyte.com/tutorials/chat-with-your-data-using-openai-pinecone-airbyte-and-langchain) 4 | 5 | It implements a chat bot that uses contextual information stored in Pinecone, Langchain to orchestrate an LLM and the Slack sdk to provide a Slack bot that can answer Airbyte connector builder-related questions on Slack. 6 | 7 | If you like this project, leave us a star ⭐ on the main [Airbyte Repo](https://github.com/airbytehq/airbyte/blob/master/README.md)! 8 | 9 | ## How to run 10 | 11 | You need locally installed python 12 | 13 | * Follow the tutorial to create a Pinecone index and populate it with data via Airbyte 14 | * Run `python -m venv venv` to create a virtual environment 15 | * Run `source venv/bin/activate` to activate the virtual environment 16 | * Run `pip install -r requirements.txt` to install the dependencies 17 | 18 | ### Run the bot locally in your terminal 19 | 20 | * Run `export PINECONE_API_KEY=` to set the pinecone api key 21 | * Run `export PINECONE_INDEX_NAME=` to set the pinecone index name 22 | * Run `export PINECONE_ENV=` to set the pinecone env 23 | * Run `export OPENAI_API_KEY=` to set the openai api key 24 | * Run `python localbot.py` to start the bot (`localbot_adapted.py` uses improved prompts for better results) 25 | 26 | ### Run the bot on Slack 27 | 28 | * Use the `slack_manifest.yml` file to create a Slack app and install it in your workspace. 29 | * Run `export PINECONE_API_KEY=` to set the pinecone api key 30 | * Run `export PINECONE_INDEX_NAME=` to set the pinecone index name 31 | * Run `export PINECONE_ENV=` to set the pinecone env 32 | * Run `export OPENAI_API_KEY=` to set the openai api key 33 | * Run `export SLACK_APP_TOKEN=` to set the slack app token 34 | * Run `export SLACK_BOT_TOKEN=` to set the slack bot token 35 | * Run `python slackbot.py` to start the bot 36 | 37 | Again, leave us a star ⭐ on the main [Airbyte repo](https://github.com/airbytehq/airbyte/blob/master/README.md)! 38 | -------------------------------------------------------------------------------- /localbot.py: -------------------------------------------------------------------------------- 1 | # This script is running the most basic bot locally in your terminal 2 | 3 | import os 4 | import pinecone 5 | from langchain.chains import RetrievalQA 6 | from langchain.embeddings import OpenAIEmbeddings 7 | from langchain.llms import OpenAI 8 | from langchain.vectorstores import Pinecone 9 | 10 | embeddings = OpenAIEmbeddings() 11 | pinecone.init(api_key=os.environ["PINECONE_KEY"], environment=os.environ["PINECONE_ENV"]) 12 | index = pinecone.Index(os.environ["PINECONE_INDEX"]) 13 | vector_store = Pinecone(index, embeddings.embed_query, "text") 14 | 15 | qa = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type="stuff", retriever=vector_store.as_retriever()) 16 | 17 | print("Connector development help bot. What do you want to know?") 18 | while True: 19 | query = input("") 20 | answer = qa.run(query) 21 | print(answer) 22 | print("\nWhat else can I help you with:") -------------------------------------------------------------------------------- /localbot_adapted.py: -------------------------------------------------------------------------------- 1 | # This script is running the bot with improved prompts locally in your terminal 2 | 3 | import os 4 | from typing import List 5 | from langchain import PromptTemplate 6 | import pinecone 7 | from langchain.chains import RetrievalQA 8 | from langchain.embeddings import OpenAIEmbeddings 9 | from langchain.llms import OpenAI 10 | from langchain.vectorstores import Pinecone 11 | from langchain.vectorstores.base import VectorStoreRetriever 12 | from langchain.schema.document import Document 13 | 14 | embeddings = OpenAIEmbeddings() 15 | pinecone.init(api_key=os.environ["PINECONE_KEY"], environment=os.environ["PINECONE_ENV"]) 16 | index = pinecone.Index(os.environ["PINECONE_INDEX"]) 17 | vector_store = Pinecone(index, embeddings.embed_query, "text") 18 | 19 | prompt_template = """You are a question-answering bot operating on Github issues and documentation pages for a product called connector builder. The documentation pages document what can be done, the issues document future plans and bugs. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. State were you got this information from (and the github issue number if applicable), but do only if you used the information in your answer. 20 | 21 | {context} 22 | 23 | Question: {question} 24 | Helpful Answer:""" 25 | prompt = PromptTemplate( 26 | template=prompt_template, input_variables=["context", "question"] 27 | ) 28 | class ContextualRetriever(VectorStoreRetriever): 29 | def _get_relevant_documents(self, query: str, *, run_manager) -> List[Document]: 30 | docs = super()._get_relevant_documents(query, run_manager=run_manager) 31 | return [self.format_doc(doc) for doc in docs] 32 | 33 | def format_doc(self, doc: Document) -> Document: 34 | if doc.metadata["_airbyte_stream"] == "item_collection": 35 | doc.page_content = f"Excerpt from documentation page: {doc.page_content}" 36 | elif doc.metadata["_airbyte_stream"] == "issues": 37 | doc.page_content = f"Excerpt from Github issue: {doc.page_content}, issue number: {int(doc.metadata['number']):d}, issue state: {doc.metadata['state']}" 38 | elif doc.metadata["_airbyte_stream"] == "threads" or doc.metadata["_airbyte_stream"] == "channel_messages": 39 | doc.page_content = f"Excerpt from Slack thread: {doc.page_content}" 40 | return doc 41 | 42 | 43 | qa = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type="stuff", retriever=ContextualRetriever(vectorstore=vector_store), chain_type_kwargs={"prompt": prompt}) 44 | 45 | print("Connector development help bot. What do you want to know?") 46 | while True: 47 | query = input("") 48 | answer = qa.run(query) 49 | print(answer) 50 | print("\nWhat else can I help you with:") 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pinecone-client 2 | langchain 3 | openai 4 | slack_sdk 5 | tiktoken 6 | -------------------------------------------------------------------------------- /slack_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "display_information": { 3 | "name": "Connector Development Helper Bot", 4 | "description": "Helpful bot to answer questions around connector development", 5 | "background_color": "#303133" 6 | }, 7 | "features": { 8 | "bot_user": { 9 | "display_name": "Airbyte Connector Dev Help", 10 | "always_online": false 11 | } 12 | }, 13 | "oauth_config": { 14 | "scopes": { 15 | "user": [ 16 | "channels:history" 17 | ], 18 | "bot": [ 19 | "channels:history", 20 | "channels:join", 21 | "channels:read", 22 | "files:read", 23 | "groups:read", 24 | "links:read", 25 | "reactions:read", 26 | "remote_files:read", 27 | "team:read", 28 | "usergroups:read", 29 | "users.profile:read", 30 | "users:read", 31 | "app_mentions:read", 32 | "chat:write" 33 | ] 34 | } 35 | }, 36 | "settings": { 37 | "event_subscriptions": { 38 | "bot_events": [ 39 | "app_mention" 40 | ] 41 | }, 42 | "interactivity": { 43 | "is_enabled": true 44 | }, 45 | "org_deploy_enabled": false, 46 | "socket_mode_enabled": true, 47 | "token_rotation_enabled": false 48 | } 49 | } -------------------------------------------------------------------------------- /slackbot.py: -------------------------------------------------------------------------------- 1 | # This script is running the bot on Slack 2 | 3 | import os 4 | from langchain.prompts import PromptTemplate 5 | from slack_sdk import WebClient 6 | from slack_sdk.socket_mode import SocketModeClient 7 | from slack_sdk.socket_mode.request import SocketModeRequest 8 | from slack_sdk.socket_mode.response import SocketModeResponse 9 | import pinecone 10 | from langchain.chains import RetrievalQA 11 | from langchain.embeddings import OpenAIEmbeddings 12 | from langchain.llms import OpenAI 13 | from langchain.vectorstores import Pinecone 14 | from langchain.vectorstores.base import VectorStoreRetriever 15 | from typing import List 16 | from langchain.schema.document import Document 17 | 18 | # Initialize Pinecone 19 | embeddings = OpenAIEmbeddings() 20 | pinecone.init(api_key=os.environ["PINECONE_KEY"], environment=os.environ["PINECONE_ENV"]) 21 | index = pinecone.Index(os.environ["PINECONE_INDEX"]) 22 | vector_store = Pinecone(index, embeddings.embed_query, "text") 23 | 24 | 25 | # Define prompts 26 | prompt_template = """You are a question-answering bot operating on Github issues and documentation pages for a product called connector builder. The documentation pages document what can be done, the issues document future plans and bugs. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. State were you got this information from (and the github issue number if applicable), but do only if you used the information in your answer. 27 | 28 | {context} 29 | 30 | Question: {question} 31 | Helpful Answer:""" 32 | prompt = PromptTemplate( 33 | template=prompt_template, input_variables=["context", "question"] 34 | ) 35 | class ContextualRetriever(VectorStoreRetriever): 36 | def _get_relevant_documents(self, query: str, *, run_manager) -> List[Document]: 37 | docs = super()._get_relevant_documents(query, run_manager=run_manager) 38 | return [self.format_doc(doc) for doc in docs] 39 | 40 | def format_doc(self, doc: Document) -> Document: 41 | if doc.metadata["_airbyte_stream"] == "item_collection": 42 | doc.page_content = f"Excerpt from documentation page: {doc.page_content}" 43 | elif doc.metadata["_airbyte_stream"] == "issues": 44 | doc.page_content = f"Excerpt from Github issue: {doc.page_content}, issue number: {int(doc.metadata['number']):d}, issue state: {doc.metadata['state']}" 45 | elif doc.metadata["_airbyte_stream"] == "threads" or doc.metadata["_airbyte_stream"] == "channel_messages": 46 | doc.page_content = f"Excerpt from Slack thread: {doc.page_content}" 47 | return doc 48 | 49 | 50 | # Initialize the QA system 51 | qa = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type="stuff", retriever=ContextualRetriever(vectorstore=vector_store), chain_type_kwargs={"prompt": prompt}) 52 | 53 | 54 | # Wire it up to Slack 55 | slack_web_client = WebClient(token=os.environ["SLACK_BOT_TOKEN"]) 56 | 57 | handled_messages = {} 58 | 59 | def process(_: SocketModeClient, socket_mode_request: SocketModeRequest): 60 | if socket_mode_request.type == "events_api": 61 | event = socket_mode_request.payload.get("event", {}) 62 | client_msg_id = event.get("client_msg_id") 63 | if event.get("type") == "app_mention" and not handled_messages.get(client_msg_id): 64 | handled_messages[client_msg_id] = True 65 | channel_id = event.get("channel") 66 | text = event.get("text") 67 | result = qa.run(text) 68 | slack_web_client.chat_postMessage(channel=channel_id, text=result) 69 | 70 | return SocketModeResponse(envelope_id=socket_mode_request.envelope_id) 71 | 72 | socket_mode_client = SocketModeClient( 73 | app_token=os.environ["SLACK_APP_TOKEN"], 74 | web_client=slack_web_client 75 | ) 76 | socket_mode_client.socket_mode_request_listeners.append(process) 77 | 78 | socket_mode_client.connect() 79 | print("listening") 80 | from threading import Event 81 | Event().wait() 82 | --------------------------------------------------------------------------------