├── .env.copy ├── vercel.json ├── prompt.py ├── .gitignore ├── requirements.txt ├── app.py ├── README.md └── walkthrough.ipynb /.env.copy: -------------------------------------------------------------------------------- 1 | COHERE_API_KEY="" 2 | OPENAI_API_KEY="" 3 | weaviate_api_key="76320a90-53d8-42bc-b41d-678647c6672e" 4 | weaviate_url="https://cohere-demo.weaviate.network/" -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "devCommand": "gunicorn app:app --host 0.0.0.0 --port 3000", 3 | "builds": [ 4 | { 5 | "src": "app.py", 6 | "use": "@vercel/python" 7 | } 8 | ], 9 | "routes": [ 10 | { 11 | "src": "/(.*)", 12 | "dest": "/" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /prompt.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | 3 | prompt_template = """Use the following pieces of context to answer the question at the end. 4 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 5 | 6 | {context} 7 | 8 | Question: {question} 9 | Helpful Answer in {language}:""" 10 | PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question","language"]) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | env/ 5 | venv/ 6 | ENV/ 7 | env.bak/ 8 | venv.bak/ 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Flask stuff: 51 | instance/ 52 | .webassets-cache -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | appnope==0.1.3 4 | asttokens==2.2.1 5 | async-timeout==4.0.2 6 | attrs==23.1.0 7 | Authlib==1.2.0 8 | backcall==0.2.0 9 | backoff==2.2.1 10 | blinker==1.6.2 11 | certifi==2023.5.7 12 | cffi==1.15.1 13 | charset-normalizer==3.1.0 14 | click==8.1.3 15 | cohere==4.4.1 16 | comm==0.1.3 17 | cryptography==40.0.2 18 | dataclasses-json==0.5.7 19 | debugpy==1.6.7 20 | decorator==5.1.1 21 | executing==1.2.0 22 | Flask==2.3.2 23 | Flask-Cors==3.0.10 24 | frozenlist==1.3.3 25 | greenlet==2.0.2 26 | gunicorn==20.1.0 27 | idna==3.4 28 | ipykernel==6.23.0 29 | ipython==8.13.2 30 | itsdangerous==2.1.2 31 | jedi==0.18.2 32 | Jinja2==3.1.2 33 | jupyter_client==8.2.0 34 | jupyter_core==5.3.0 35 | langchain @ git+https://github.com/hwchase17/langchain.git@3637d6da6e51cd8b96a9a4f91eb71e8db21a8b20 36 | MarkupSafe==2.1.2 37 | marshmallow==3.19.0 38 | marshmallow-enum==1.5.1 39 | matplotlib-inline==0.1.6 40 | multidict==6.0.4 41 | mypy-extensions==1.0.0 42 | nest-asyncio==1.5.6 43 | numexpr==2.8.4 44 | numpy==1.24.3 45 | openai==0.27.6 46 | openapi-schema-pydantic==1.2.4 47 | packaging==23.1 48 | parso==0.8.3 49 | pexpect==4.8.0 50 | pickleshare==0.7.5 51 | pip-autoremove==0.10.0 52 | platformdirs==3.5.0 53 | prompt-toolkit==3.0.38 54 | psutil==5.9.5 55 | ptyprocess==0.7.0 56 | pure-eval==0.2.2 57 | pycparser==2.21 58 | pydantic==1.10.7 59 | Pygments==2.15.1 60 | python-dateutil==2.8.2 61 | python-dotenv==1.0.0 62 | PyYAML==6.0 63 | pyzmq==25.0.2 64 | requests==2.28.2 65 | six==1.16.0 66 | SQLAlchemy==2.0.12 67 | stack-data==0.6.2 68 | tenacity==8.2.2 69 | tornado==6.3.1 70 | tqdm==4.65.0 71 | traitlets==5.9.0 72 | typing-inspect==0.8.0 73 | typing_extensions==4.5.0 74 | urllib3==1.26.15 75 | validators==0.20.0 76 | wcwidth==0.2.6 77 | weaviate-client==3.18.0 78 | Werkzeug==2.3.4 79 | yarl==1.9.2 80 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # importing necessary flask functions 2 | from flask import Flask, request 3 | from flask_cors import CORS 4 | import os 5 | 6 | # Flask config 7 | app = Flask(__name__) 8 | CORS(app) 9 | 10 | # Default route to test if the app is properly deployed 11 | @app.route("/") 12 | def read_root(): 13 | return {"LangChainApp": "Working"} 14 | 15 | from dotenv import load_dotenv 16 | load_dotenv() 17 | openai_api_key = os.getenv('OPENAI_API_KEY') 18 | cohere_api_key = os.getenv('COHERE_API_KEY') 19 | weaviate_api_key = os.getenv('weaviate_api_key') 20 | weaviate_url = os.getenv('weaviate_url') 21 | 22 | # Retrieval code using langchain functions 23 | import weaviate 24 | from langchain.embeddings import CohereEmbeddings 25 | from langchain.vectorstores import Weaviate 26 | from langchain.llms import OpenAI 27 | from langchain.chains import RetrievalQA 28 | from prompt import PROMPT 29 | 30 | # Connect to the Weaviate demo databse containing 10M wikipedia vectors 31 | # This uses a public READ-ONLY Weaviate API key 32 | auth_config = weaviate.auth.AuthApiKey(api_key=weaviate_api_key) 33 | 34 | client = weaviate.Client( url=weaviate_url, auth_client_secret=auth_config, 35 | additional_headers={ "X-Cohere-Api-Key": cohere_api_key}) 36 | 37 | ## Defining vectorstore, embedding model, and llm 38 | vectorstore = Weaviate(client, index_name="Articles", text_key="text") 39 | vectorstore._query_attrs = ["text", "title", "url", "views", "lang", "_additional {distance}"] 40 | vectorstore.embedding =CohereEmbeddings(model="embed-multilingual-v2.0", cohere_api_key=cohere_api_key) 41 | llm =OpenAI(temperature=0, openai_api_key=openai_api_key) 42 | 43 | # This route helps generate answer 44 | @app.route("/retrieve", methods=['POST']) 45 | def retrieve_resp(): 46 | query = request.json.get("query") 47 | language = request.json.get("language", "english") 48 | qa = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={"prompt": PROMPT.partial(language=language)}) 49 | search_result = qa({"query": query}) 50 | return {"search_result":search_result} 51 | 52 | # This route gets list of most similar embeddings to the questions asked 53 | @app.route("/retrieve-list", methods=['POST']) 54 | def retrieve_list(): 55 | query = request.json.get("query") 56 | k = request.json.get("k", 4) 57 | docs_list = vectorstore.similarity_search(query, k) 58 | return {"docs_list": str(docs_list)} 59 | 60 | 61 | # Contextual compression implementation 62 | from langchain.retrievers.contextual_compression import ContextualCompressionRetriever 63 | from langchain.retrievers.document_compressors import CohereRerank 64 | # this function uses Cohere rerand method to perform Contextual Compression 65 | def compression(k, top_n): 66 | retriever = vectorstore.as_retriever(search_kwargs={"k": k}) 67 | compressor = CohereRerank(model='rerank-multilingual-v2.0', top_n=top_n ) 68 | compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever) 69 | return compression_retriever 70 | 71 | # This route helps generate answer using Contextual Compression 72 | @app.route("/retrieve-compr", methods=['POST']) 73 | def retrieve_compressed_resp(): 74 | query = request.json.get("query") 75 | k = request.json.get("k", 9) 76 | top_n = request.json.get("top_n", 3) 77 | language = request.json.get("language", "english") 78 | compression_retriever = compression(k, top_n) 79 | qa = RetrievalQA.from_chain_type(llm, retriever=compression_retriever, chain_type_kwargs={"prompt": PROMPT.partial(language=language)}) 80 | search_result = qa({"query": query}) 81 | return {"search_result":search_result} 82 | 83 | # This route gets list of most similar embeddings to the questions asked with Contextual Compression 84 | @app.route("/retrieve-compr-list", methods=['POST']) 85 | def retrieve_compressed_list(): 86 | query = request.json.get("query") 87 | k = request.json.get("k", 9) 88 | top_n = request.json.get("top_n", 3) 89 | compression_retriever = compression(k, top_n) 90 | compressed_docs_list = compression_retriever.get_relevant_documents(query) 91 | return {"compressed_docs_list":str(compressed_docs_list)} 92 | 93 | # Parsing the user input 94 | from langchain.prompts import PromptTemplate 95 | from langchain.chains import ConversationalRetrievalChain 96 | from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory 97 | from langchain.output_parsers import PydanticOutputParser 98 | from pydantic import BaseModel, Field 99 | # Defining user input 100 | class UserInput(BaseModel): 101 | question: str = Field(description="question asked by a user") 102 | language: str = Field(description="language requested by the user to respond in") 103 | 104 | # Set up a parser + inject instructions into the prompt template. 105 | parser = PydanticOutputParser(pydantic_object=UserInput) 106 | 107 | prompt = PromptTemplate( 108 | template="Take the user input which contains a question and a language to return results in, and extract the question and language. Extracted question should not have language in it.\n{format_instructions}\n{query}\n", 109 | input_variables=["query"], 110 | partial_variables={"format_instructions": parser.get_format_instructions()} 111 | ) 112 | memory = ConversationBufferWindowMemory( k=3, memory_key="chat_history", return_messages=True) 113 | 114 | # This route allows to chat with our application without historical chat context 115 | @app.route("/chat-no-history", methods=['POST']) 116 | def chat_no_history(): 117 | query = request.json.get("query") 118 | k = request.json.get("k", 9) 119 | top_n = request.json.get("top_n", 3) 120 | compression_retriever = compression(k, top_n) 121 | _input = prompt.format_prompt(query=query) 122 | output = llm(_input.to_string()) 123 | parsed_results = parser.parse(output) 124 | language = parsed_results.language or "english" 125 | qa = RetrievalQA.from_chain_type(llm, retriever=compression_retriever, chain_type_kwargs={"prompt": PROMPT.partial(language=language)}) 126 | search_result = qa({"query": parsed_results.question}) 127 | return {"search_result":search_result['result']} 128 | 129 | # This route allows to chat with our application with history 130 | @app.route("/chat-with-history", methods=['POST']) 131 | def chat_history(): 132 | query = request.json.get("query") 133 | k = request.json.get("k", 9) 134 | top_n = request.json.get("top_n", 3) 135 | compression_retriever = compression(k, top_n) 136 | _input = prompt.format_prompt(query=query) 137 | output = llm(_input.to_string()) 138 | parsed_results = parser.parse(output) 139 | language = parsed_results.language or "english" 140 | memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) 141 | qa = ConversationalRetrievalChain.from_llm(llm, retriever=compression_retriever,memory=memory, combine_docs_chain_kwargs={"prompt": PROMPT.partial(language=language)}) 142 | search_result = qa({"question": parsed_results.question}) 143 | return {"search_result":search_result['answer']} 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project Name 2 | 3 | Cohere Weviate Wikipedia Retrieval using LangChain 4 | 5 | ## Description 6 | 7 | A backend API to perform search over Wikipedia using LangChain, Cohere and Weaviate 8 | 9 | ## Getting Started 10 | 11 | ### Prerequisites 12 | 13 | To use this project, you will need to have the following installed on your machine: 14 | 15 | - Python 3.8 or above 16 | - pip 17 | - virtualenv 18 | 19 | ### Installing 20 | 21 | To install and run this project on your local machine, follow these steps: 22 | 23 | 1. Clone the repository onto your machine using the following command: 24 | 25 | ``` 26 | git clone https://github.com/menloparklab/cohere-weviate-wikipedia-retrieval 27 | ``` 28 | 29 | 2. Create a virtual environment for the project using the following command: 30 | 31 | ``` 32 | python3 -m venv venv 33 | ``` 34 | 35 | 3. Activate the virtual environment using the following command: 36 | 37 | ``` 38 | source venv/bin/activate 39 | ``` 40 | 41 | 4. Install the project dependencies using the following command: 42 | 43 | ``` 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | 5. Create a `.env` file in the root directory of the project and add your API keys. You can use the `.env.copy` file as a template. 48 | 49 | Weaviate api keys and url are left intentionally. These are read only api provided by Weaviate for demo purposes. 50 | 51 | 6. To test your output and results, use the provided jupyter notebook. You can easily run this in Colab as well. 52 | 53 | 54 | 55 | 7. To start the API routes using Flask, run the following command: 56 | 57 | ``` 58 | gunicorn app:app 59 | ``` 60 | 61 | ### Below are the endpoints and examples to call them 62 | 63 | 1. `/retrieve` 64 | 65 | This endpoint generates an answer to a query using retrieval-based QA. To use this endpoint, send a POST request to `http:///retrieve` with the following JSON payload: 66 | 67 | ``` 68 | { 69 | "query": "", 70 | "language": "" 71 | } 72 | ``` 73 | 74 | The `query` field should contain the query for which you want to generate an answer. The `language` field is optional and should be set to the language of the query. If the `language` field is not set, the default language is English. 75 | 76 | Example JSON: 77 | 78 | ``` 79 | { 80 | "query": "What is the capital of France?", 81 | "language": "english" 82 | } 83 | ``` 84 | 85 | 2. `/retrieve-list` 86 | 87 | This endpoint returns a list of most similar embeddings to the query using the vectorstore. To use this endpoint, send a POST request to `http:///retrieve-list` with the following JSON payload: 88 | 89 | ``` 90 | { 91 | "query": "", 92 | "k": 93 | } 94 | ``` 95 | 96 | The `query` field should contain the query for which you want to generate an answer. The `k` field is optional and should be set to the number of most similar embeddings you want to retrieve. If the `k` field is not set, the default value is 4. 97 | 98 | Example JSON: 99 | 100 | ``` 101 | { 102 | "query": "What is the capital of France?", 103 | "k": 4 104 | } 105 | ``` 106 | 107 | 3. `/retrieve-compr` 108 | 109 | This endpoint generates an answer to a query using Contextual Compression. To use this endpoint, send a POST request to `http:///retrieve-compr` with the following JSON payload: 110 | 111 | ``` 112 | { 113 | "query": "", 114 | "k": , 115 | "top_n": , 116 | "language": "" 117 | } 118 | ``` 119 | 120 | The `query` field should contain the query for which you want to generate an answer. The `k` and `top_n` fields are optional and should be set to the number of most similar embeddings you want to retrieve and the number of compressed documents you want to consider, respectively. If the `k` and `top_n` fields are not set, the default values are 9 and 3, respectively. The `language` field is optional and should be set to the language of the query. If the `language` field is not set, the default language is English. 121 | 122 | Example JSON: 123 | 124 | ``` 125 | { 126 | "query": "What is the capital of France?", 127 | "k": 9, 128 | "top_n": 3, 129 | "language": "english" 130 | } 131 | ``` 132 | 133 | 4. `/retrieve-compr-list` 134 | 135 | This endpoint returns a list of most similar embeddings to the query using Contextual Compression. To use this endpoint, send a POST request to `http:///retrieve-compr-list` with the following JSON payload: 136 | 137 | ``` 138 | { 139 | "query": "", 140 | "k": , 141 | "top_n": 142 | } 143 | ``` 144 | 145 | The `query` field should contain the query for which you want to generate an answer. The `k` and `top_n` fields are optional and should be set to the number of most similar embeddings you want to retrieve and the number of compressed documents you want to consider, respectively. If the `k` and `top_n` fields are not set, the default values are 9 and 3, respectively. 146 | 147 | 148 | 5. `/chat-no-history` 149 | 150 | This route allows the user to chat with the application without any historical chat context. It accepts the following parameters in a JSON request body: 151 | - `query`: The user's query. Required. 152 | - `k`: An integer value for the number of results to retrieve from the model. Optional, defaults to 9. 153 | - `top_n`: An integer value for the number of top search results to consider for generating an answer. Optional, defaults to 3. 154 | 155 | The route then uses the `compression` function to retrieve the top `k` results from the model, and constructs a prompt using the user's query. The prompt is passed to the machine learning model, and the output is parsed using a `parser` object. If a language is detected in the output, it is used for subsequent queries, otherwise the default is English. The `RetrievalQA` class is used to generate a response using the `qa` object, and the search result is returned as a JSON response. 156 | 157 | Example JSON 158 | 159 | ```json 160 | { 161 | "query": "What is the capital of France?", 162 | "k": 5, 163 | "top_n": 2 164 | } 165 | ``` 166 | 167 | Example Response 168 | 169 | ```json 170 | { 171 | "search_result": "Paris is the capital of France." 172 | } 173 | ``` 174 | 175 | 6. `/chat-with-history` 176 | 177 | This route allows the user to chat with the application using historical chat context. It accepts the same parameters as the previous route: 178 | - `query`: The user's query. Required. 179 | - `k`: An integer value for the number of results to retrieve from the model. Optional, defaults to 9. 180 | - `top_n`: An integer value for the number of top search results to consider for generating an answer. Optional, defaults to 3. 181 | 182 | In addition, this route maintains a memory of past conversations using the `ConversationBufferMemory` class, and generates responses using the `ConversationalRetrievalChain` class. The memory key for this route is set to `"chat_history"`. The search result is returned as a JSON response. 183 | 184 | Example Json 185 | 186 | ```json 187 | { 188 | "query": "What is the capital of Spain?", 189 | "k": 3, 190 | "top_n": 1 191 | } 192 | ``` 193 | 194 | Example Response 195 | 196 | ```json 197 | { 198 | "search_result": "The capital of Spain is Madrid." 199 | } 200 | ``` 201 | 202 | 203 | -------------------------------------------------------------------------------- /walkthrough.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Building a LangChain based app to retrieve Wikipedia Embeddings\n", 9 | "\n", 10 | "Cohere [recently embedded](https://txt.cohere.com/embedding-archives-wikipedia/) the entire wikipedia and made it available via [HuggingFace](https://huggingface.co/Cohere?ref=txt.cohere.com). Weviate hosted the embeddings and made it available for [free](https://weaviate.io/developers/weaviate/more-resources/example-datasets#semantic-search-through-wikipedia)\n", 11 | "\n", 12 | "Below is the code to retrieve from the Weviate Vector Database" 13 | ] 14 | }, 15 | { 16 | "attachments": {}, 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Code to retrive using semantic search" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 32, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import os\n", 30 | "import weaviate\n", 31 | "from langchain.embeddings import CohereEmbeddings\n", 32 | "from langchain.vectorstores import Weaviate\n", 33 | "from langchain.llms import OpenAI\n", 34 | "from langchain.chains import RetrievalQA\n", 35 | "\n", 36 | "from dotenv import load_dotenv\n", 37 | "load_dotenv()\n", 38 | "openai_api_key = os.getenv('OPENAI_API_KEY')\n", 39 | "cohere_api_key = os.getenv('COHERE_API_KEY')\n", 40 | "weaviate_api_key = os.getenv('weaviate_api_key')\n", 41 | "weaviate_url = os.getenv('weaviate_url')\n", 42 | "\n", 43 | "\n", 44 | "# Connect to the Weaviate demo databse containing 10M wikipedia vectors\n", 45 | "# This uses a public READ-ONLY Weaviate API key\n", 46 | "auth_config = weaviate.auth.AuthApiKey(api_key=weaviate_api_key) \n", 47 | "\n", 48 | "client = weaviate.Client( url=weaviate_url, auth_client_secret=auth_config, \n", 49 | " additional_headers={ \"X-Cohere-Api-Key\": cohere_api_key})\n", 50 | "\n", 51 | "\n", 52 | "vectorstore = Weaviate(client, index_name=\"Articles\", text_key=\"text\")\n", 53 | "vectorstore._query_attrs = [\"text\", \"title\", \"url\", \"views\", \"lang\", \"_additional {distance}\"]\n", 54 | "vectorstore.embedding =CohereEmbeddings(model=\"embed-multilingual-v2.0\", cohere_api_key=cohere_api_key)\n", 55 | "llm =OpenAI(temperature=0, openai_api_key=openai_api_key)\n", 56 | "qa = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())\n", 57 | "query = \"Why is the theory of everything significant?\"\n", 58 | "result = qa({\"query\": query})" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 20, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "' La teoría del todo es significativa porque podría unificar todas las interacciones fundamentales de la naturaleza, que son consideradas como cuatro: gravitación, la fuerza nuclear fuerte, la fuerza nuclear débil y la electromagnética. Esto permitiría una comprensión profunda de varios tipos diferentes de partículas, así como de diferentes fuerzas.'" 70 | ] 71 | }, 72 | "execution_count": 20, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "result['result']" 79 | ] 80 | }, 81 | { 82 | "attachments": {}, 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Great the retrieval works, but do we know if the most relevant results are being sent for answer generation. Also, the answer is not in English, can we fix it? \n", 87 | "\n", 88 | "Let's see what happening under the hood." 89 | ] 90 | }, 91 | { 92 | "attachments": {}, 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "#### Both methods below help retrive Documents based on similarity search" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 37, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "[Document(page_content='सर्वतत्व सिद्धांत या हर चीज़ का सिद्धांत या सब कुछ का सिद्धांत () सैद्धांतिक भौतिकी का एक कल्पित सिद्धांत है जो हमारे भौतिक ब्रह्माण्ड में घट सकने वाली हर चीज़ को वैज्ञानिक दृष्टि से समझाने की क्षमता रखता होगा। अगर यह सिद्धांत स्पष्ट हो जाता है तो ऐसा कोई भी प्रयोग नहीं होगा जिसके नतीजे के बारे में पहले से ही सही भविष्यवाणी करनी सम्भव न हो। यह सिद्धांत अन्य सभी ज्ञात सिद्धांतो को एक-दूसरे से जोड़ने का भी काम करेगा। वैज्ञानिकों द्वारा सर्वतत्व सिद्धांत ढूंढने का एक मुख्य कारण प्रमात्रा यान्त्रिकी (क्वान्टम मकैनिक्स) और सामान्य सापेक्षता (थीओरी ऑफ़ रॅलॅटिविटि) के बीच में तालमेल बनाना है। बिना इस सिद्धांत के मिले यह दोनों मूल सिद्धांत कुछ पहलूओं में एक-दूसरे का खंडन करते हैं।', metadata={'_additional': {'distance': -150.66002}, 'lang': 'hi', 'title': 'सर्वतत्व सिद्धांत', 'url': 'https://hi.wikipedia.org/wiki?curid=514328', 'views': 40}),\n", 108 | " Document(page_content='Una teoría del todo (o ToE por sus siglas en inglés, \"Theory of Everything\") es una teoría hipotética de la física teórica que explicaría y conectaría en un esquema teórico unificado las interacciones físicas fundamentales. Inicialmente, el término se usó con una connotación irónica, para referirse a varias teorías sobregeneralizadas. Después se popularizó en la física cuántica al describir varias propuestas teóricas que podrían unificar o explicar a través de un modelo consistente todas las interacciones fundamentales encontradas en teoría cuántica de campos. Otros términos, no del todo sinónimos, empleados para referirse al mismo concepto son teoría unificada, gran teoría unificada, teoría de campos unificada y teoría del campo unificado.', metadata={'_additional': {'distance': -150.61458}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900}),\n", 109 | " Document(page_content='En la corriente principal de la física actual, la Teoría del Todo podría unificar todas las interacciones fundamentales de la naturaleza, que son consideradas como cuatro: gravitación, la fuerza nuclear fuerte, la fuerza nuclear débil y la electromagnética. Dado que la fuerza débil puede transformar partículas elementales de una clase a otra, la teoría del todo debería producir una comprensión profunda de varios tipos diferentes de partículas, así como de diferentes fuerzas. El patrón previsible de las teorías es el siguiente:', metadata={'_additional': {'distance': -150.42622}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900}),\n", 110 | " Document(page_content='In fisica la teoria del tutto, conosciuta anche come TOE (acronimo dell\\'inglese \"theory of everything\"), è un\\'ipotetica teoria fisica in grado di spiegare e riunire in un unico quadro tutti i fenomeni fisici conosciuti. Presupposto minimo di tale teoria è l\\'unificazione di tutte le interazioni fondamentali.', metadata={'_additional': {'distance': -150.09216}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600}),\n", 111 | " Document(page_content=\"Nella corrente principale dell'attuale fisica, una Teoria del Tutto unificherebbe tutte le interazioni fondamentali della natura, che sono solitamente considerate essere quattro in numero: gravità, forza nucleare forte, forza nucleare debole e forza elettromagnetica. Siccome la forza debole può trasformare le particelle elementari da un tipo a un altro, la Teoria del Tutto dovrebbe dare una profonda comprensione dei vari tipi di particelle e delle diverse forze.\", metadata={'_additional': {'distance': -149.89221}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600}),\n", 112 | " Document(page_content='El concepto de una \"teoría del todo\" está arraigado en el principio de causalidad y su descubrimiento es la empresa de acercarnos a ver a través de los ojos del demonio de Laplace. Aunque dicha posibilidad puede considerarse como determinista, en una \"simple fórmula\" puede todavía sobrevivir la física fundamentalmente probabilista, como proponen algunas posturas actuales de la mecánica cuántica. Esto se debe a que aun si los mecanismos que gobiernan las partículas son intrínsecamente azarosos, podemos conocer las reglas que gobiernan dicho azar y calcular las probabilidades de ocurrencia para cada evento posible. Sin embargo, otras interpretaciones de la ecuación de Schrödinger conceden poca importancia al azar: este solo se tendría importancia dentro del átomo y se diluiría en el mundo macroscópico. Otras no obstante la niegan completamente y la consideran una interpretación equivocada de las leyes cuánticas. En consecuencia, la mayor dificultad de descubrir una teoría unificada ha sido armonizar correctamente leyes que gobiernan solo un reducido ámbito de la naturaleza y transformarlas en una única teoría que la explique en su totalidad, tanto en su mundo micro como macroscópico y explique la existencia de todas las interacciones fundamentales: las fuerzas gravitatoria, electromagnética, nuclear fuerte y nuclear débil.', metadata={'_additional': {'distance': -149.70148}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900}),\n", 113 | " Document(page_content='في الفيزياء الحالية السائدة، نظرية كل شيء هي محاولة لتوحيد القوى الأساسية الأربعة الموجودة في الطبيعة: (أي الثقالة, و القوة النووية القوية, و القوة النووية الضعيفة, و القوة الكهرومغناطيسية). بما أن القوة الضعيفة لها القدرة على تغيير الجسيمات الأولية من شكل لآخر، سينبغي على نظرية كل شيء بأن تعطينا فهماً عميقا للعلاقات الموجودة بين جميع الجسيمات المختلفة،', metadata={'_additional': {'distance': -149.68802}, 'lang': 'ar', 'title': 'نظرية كل شيء', 'url': 'https://ar.wikipedia.org/wiki?curid=10751', 'views': 200}),\n", 114 | " Document(page_content=\"La principale teoria del tutto è al momento la Teoria delle superstringhe / M-teoria; l'attuale ricerca sulla gravità quantistica a loop potrebbe eventualmente giocare un ruolo fondamentale in una teoria del tutto, ma non è il suo obiettivo principale. Queste teorie cercano di affrontare il problema della rinormalizzazione fissando alcuni limiti inferiori sulle scale di lunghezza possibili. La teoria delle stringhe e la supergravità (entrambe ritenute casi limite della ancora non ben definita M-teoria) suppongono che l'universo abbia effettivamente un numero di dimensioni superiore alle tre dello spazio e una del tempo, che sono intuitive.\", metadata={'_additional': {'distance': -149.24983}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600}),\n", 115 | " Document(page_content='نظرية كل شيء أو اختصاراً TOE أو معادلة الكون تشكل وصفاً شمولياً للمادة في الفيزياء النظرية، من المفترض أنها قادرة على تفسير جميع الظواهر الفيزيائية بشكل كامل وتفسر جميع المؤثرات الفيزيائية (أي كل شيء) ولا يزال البحث جارياً لمحاولة صياغتها. ومن المفترض أنها سوف تربط بين القوى الأربعة المعروفة التي تتحكم في تبادل القوى بين جميع الجسيمات المعروفة وغير المعروفة (مثل المادة المظلمة). القوى الأربعة المعروفة حتى الآن هي: القوة النووية الشديدة، التآثر الكهرومغناطيسي، القوة الضعيفة، وقوة الجاذبية.', metadata={'_additional': {'distance': -148.91898}, 'lang': 'ar', 'title': 'نظرية كل شيء', 'url': 'https://ar.wikipedia.org/wiki?curid=10751', 'views': 200}),\n", 116 | " Document(page_content='[자연의 움직임에 담겨있는 힘들에 대해 확실하게 알게 되는 순간과 모든 자연을 구성하고 있는 물체들의 위치에 대한 이해력, 만약 이 이해력이 또한 이 자료들을 분석하고 제출할 수 있을 정도로 광대하게 충분하다면, 그 이해력은 가장 위대한 우주의 본체와 그것들의 가장 작은 핵들을 하나의 공식 안으로 통합할 수 있을 것이다. 이러한 이유로 지성은 조금도 불확실해지려 하지 않고 미래는 마치 눈에 띄기 전의 현재가 되려는 과거와 같다.', metadata={'_additional': {'distance': -148.68285}, 'lang': 'ko', 'title': '모든 것의 이론', 'url': 'https://ko.wikipedia.org/wiki?curid=142233', 'views': 60})]" 117 | ] 118 | }, 119 | "execution_count": 37, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "query = \"Why is the theory of everything significant?\"\n", 126 | "docs = vectorstore.similarity_search(query, 10)\n", 127 | "docs" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 38, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "[Document(page_content='सर्वतत्व सिद्धांत या हर चीज़ का सिद्धांत या सब कुछ का सिद्धांत () सैद्धांतिक भौतिकी का एक कल्पित सिद्धांत है जो हमारे भौतिक ब्रह्माण्ड में घट सकने वाली हर चीज़ को वैज्ञानिक दृष्टि से समझाने की क्षमता रखता होगा। अगर यह सिद्धांत स्पष्ट हो जाता है तो ऐसा कोई भी प्रयोग नहीं होगा जिसके नतीजे के बारे में पहले से ही सही भविष्यवाणी करनी सम्भव न हो। यह सिद्धांत अन्य सभी ज्ञात सिद्धांतो को एक-दूसरे से जोड़ने का भी काम करेगा। वैज्ञानिकों द्वारा सर्वतत्व सिद्धांत ढूंढने का एक मुख्य कारण प्रमात्रा यान्त्रिकी (क्वान्टम मकैनिक्स) और सामान्य सापेक्षता (थीओरी ऑफ़ रॅलॅटिविटि) के बीच में तालमेल बनाना है। बिना इस सिद्धांत के मिले यह दोनों मूल सिद्धांत कुछ पहलूओं में एक-दूसरे का खंडन करते हैं।', metadata={'_additional': {'distance': -150.66002}, 'lang': 'hi', 'title': 'सर्वतत्व सिद्धांत', 'url': 'https://hi.wikipedia.org/wiki?curid=514328', 'views': 40}),\n", 139 | " Document(page_content='Una teoría del todo (o ToE por sus siglas en inglés, \"Theory of Everything\") es una teoría hipotética de la física teórica que explicaría y conectaría en un esquema teórico unificado las interacciones físicas fundamentales. Inicialmente, el término se usó con una connotación irónica, para referirse a varias teorías sobregeneralizadas. Después se popularizó en la física cuántica al describir varias propuestas teóricas que podrían unificar o explicar a través de un modelo consistente todas las interacciones fundamentales encontradas en teoría cuántica de campos. Otros términos, no del todo sinónimos, empleados para referirse al mismo concepto son teoría unificada, gran teoría unificada, teoría de campos unificada y teoría del campo unificado.', metadata={'_additional': {'distance': -150.61458}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900}),\n", 140 | " Document(page_content='En la corriente principal de la física actual, la Teoría del Todo podría unificar todas las interacciones fundamentales de la naturaleza, que son consideradas como cuatro: gravitación, la fuerza nuclear fuerte, la fuerza nuclear débil y la electromagnética. Dado que la fuerza débil puede transformar partículas elementales de una clase a otra, la teoría del todo debería producir una comprensión profunda de varios tipos diferentes de partículas, así como de diferentes fuerzas. El patrón previsible de las teorías es el siguiente:', metadata={'_additional': {'distance': -150.42622}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900}),\n", 141 | " Document(page_content='In fisica la teoria del tutto, conosciuta anche come TOE (acronimo dell\\'inglese \"theory of everything\"), è un\\'ipotetica teoria fisica in grado di spiegare e riunire in un unico quadro tutti i fenomeni fisici conosciuti. Presupposto minimo di tale teoria è l\\'unificazione di tutte le interazioni fondamentali.', metadata={'_additional': {'distance': -150.09216}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600}),\n", 142 | " Document(page_content=\"Nella corrente principale dell'attuale fisica, una Teoria del Tutto unificherebbe tutte le interazioni fondamentali della natura, che sono solitamente considerate essere quattro in numero: gravità, forza nucleare forte, forza nucleare debole e forza elettromagnetica. Siccome la forza debole può trasformare le particelle elementari da un tipo a un altro, la Teoria del Tutto dovrebbe dare una profonda comprensione dei vari tipi di particelle e delle diverse forze.\", metadata={'_additional': {'distance': -149.89221}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600}),\n", 143 | " Document(page_content='El concepto de una \"teoría del todo\" está arraigado en el principio de causalidad y su descubrimiento es la empresa de acercarnos a ver a través de los ojos del demonio de Laplace. Aunque dicha posibilidad puede considerarse como determinista, en una \"simple fórmula\" puede todavía sobrevivir la física fundamentalmente probabilista, como proponen algunas posturas actuales de la mecánica cuántica. Esto se debe a que aun si los mecanismos que gobiernan las partículas son intrínsecamente azarosos, podemos conocer las reglas que gobiernan dicho azar y calcular las probabilidades de ocurrencia para cada evento posible. Sin embargo, otras interpretaciones de la ecuación de Schrödinger conceden poca importancia al azar: este solo se tendría importancia dentro del átomo y se diluiría en el mundo macroscópico. Otras no obstante la niegan completamente y la consideran una interpretación equivocada de las leyes cuánticas. En consecuencia, la mayor dificultad de descubrir una teoría unificada ha sido armonizar correctamente leyes que gobiernan solo un reducido ámbito de la naturaleza y transformarlas en una única teoría que la explique en su totalidad, tanto en su mundo micro como macroscópico y explique la existencia de todas las interacciones fundamentales: las fuerzas gravitatoria, electromagnética, nuclear fuerte y nuclear débil.', metadata={'_additional': {'distance': -149.70148}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900}),\n", 144 | " Document(page_content='في الفيزياء الحالية السائدة، نظرية كل شيء هي محاولة لتوحيد القوى الأساسية الأربعة الموجودة في الطبيعة: (أي الثقالة, و القوة النووية القوية, و القوة النووية الضعيفة, و القوة الكهرومغناطيسية). بما أن القوة الضعيفة لها القدرة على تغيير الجسيمات الأولية من شكل لآخر، سينبغي على نظرية كل شيء بأن تعطينا فهماً عميقا للعلاقات الموجودة بين جميع الجسيمات المختلفة،', metadata={'_additional': {'distance': -149.68802}, 'lang': 'ar', 'title': 'نظرية كل شيء', 'url': 'https://ar.wikipedia.org/wiki?curid=10751', 'views': 200}),\n", 145 | " Document(page_content=\"La principale teoria del tutto è al momento la Teoria delle superstringhe / M-teoria; l'attuale ricerca sulla gravità quantistica a loop potrebbe eventualmente giocare un ruolo fondamentale in una teoria del tutto, ma non è il suo obiettivo principale. Queste teorie cercano di affrontare il problema della rinormalizzazione fissando alcuni limiti inferiori sulle scale di lunghezza possibili. La teoria delle stringhe e la supergravità (entrambe ritenute casi limite della ancora non ben definita M-teoria) suppongono che l'universo abbia effettivamente un numero di dimensioni superiore alle tre dello spazio e una del tempo, che sono intuitive.\", metadata={'_additional': {'distance': -149.24983}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600}),\n", 146 | " Document(page_content='نظرية كل شيء أو اختصاراً TOE أو معادلة الكون تشكل وصفاً شمولياً للمادة في الفيزياء النظرية، من المفترض أنها قادرة على تفسير جميع الظواهر الفيزيائية بشكل كامل وتفسر جميع المؤثرات الفيزيائية (أي كل شيء) ولا يزال البحث جارياً لمحاولة صياغتها. ومن المفترض أنها سوف تربط بين القوى الأربعة المعروفة التي تتحكم في تبادل القوى بين جميع الجسيمات المعروفة وغير المعروفة (مثل المادة المظلمة). القوى الأربعة المعروفة حتى الآن هي: القوة النووية الشديدة، التآثر الكهرومغناطيسي، القوة الضعيفة، وقوة الجاذبية.', metadata={'_additional': {'distance': -148.91898}, 'lang': 'ar', 'title': 'نظرية كل شيء', 'url': 'https://ar.wikipedia.org/wiki?curid=10751', 'views': 200}),\n", 147 | " Document(page_content='[자연의 움직임에 담겨있는 힘들에 대해 확실하게 알게 되는 순간과 모든 자연을 구성하고 있는 물체들의 위치에 대한 이해력, 만약 이 이해력이 또한 이 자료들을 분석하고 제출할 수 있을 정도로 광대하게 충분하다면, 그 이해력은 가장 위대한 우주의 본체와 그것들의 가장 작은 핵들을 하나의 공식 안으로 통합할 수 있을 것이다. 이러한 이유로 지성은 조금도 불확실해지려 하지 않고 미래는 마치 눈에 띄기 전의 현재가 되려는 과거와 같다.', metadata={'_additional': {'distance': -148.68285}, 'lang': 'ko', 'title': '모든 것의 이론', 'url': 'https://ko.wikipedia.org/wiki?curid=142233', 'views': 60})]" 148 | ] 149 | }, 150 | "execution_count": 38, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 10})\n", 157 | "docs1 = retriever.get_relevant_documents(query)\n", 158 | "docs1" 159 | ] 160 | }, 161 | { 162 | "attachments": {}, 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "These are queite many different results from different language wikipedia articles. Can we find out which answers are most relavant?" 167 | ] 168 | }, 169 | { 170 | "attachments": {}, 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "### Contextual Compression helps pass only the most relavant answers to the LLM for answer completion" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 36, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "[Document(page_content='في الفيزياء الحالية السائدة، نظرية كل شيء هي محاولة لتوحيد القوى الأساسية الأربعة الموجودة في الطبيعة: (أي الثقالة, و القوة النووية القوية, و القوة النووية الضعيفة, و القوة الكهرومغناطيسية). بما أن القوة الضعيفة لها القدرة على تغيير الجسيمات الأولية من شكل لآخر، سينبغي على نظرية كل شيء بأن تعطينا فهماً عميقا للعلاقات الموجودة بين جميع الجسيمات المختلفة،', metadata={'_additional': {'distance': -149.68802}, 'lang': 'ar', 'title': 'نظرية كل شيء', 'url': 'https://ar.wikipedia.org/wiki?curid=10751', 'views': 200, 'relevance_score': 0.9994207}),\n", 186 | " Document(page_content=\"Nella corrente principale dell'attuale fisica, una Teoria del Tutto unificherebbe tutte le interazioni fondamentali della natura, che sono solitamente considerate essere quattro in numero: gravità, forza nucleare forte, forza nucleare debole e forza elettromagnetica. Siccome la forza debole può trasformare le particelle elementari da un tipo a un altro, la Teoria del Tutto dovrebbe dare una profonda comprensione dei vari tipi di particelle e delle diverse forze.\", metadata={'_additional': {'distance': -149.89221}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600, 'relevance_score': 0.99890983}),\n", 187 | " Document(page_content='Una teoría del todo (o ToE por sus siglas en inglés, \"Theory of Everything\") es una teoría hipotética de la física teórica que explicaría y conectaría en un esquema teórico unificado las interacciones físicas fundamentales. Inicialmente, el término se usó con una connotación irónica, para referirse a varias teorías sobregeneralizadas. Después se popularizó en la física cuántica al describir varias propuestas teóricas que podrían unificar o explicar a través de un modelo consistente todas las interacciones fundamentales encontradas en teoría cuántica de campos. Otros términos, no del todo sinónimos, empleados para referirse al mismo concepto son teoría unificada, gran teoría unificada, teoría de campos unificada y teoría del campo unificado.', metadata={'_additional': {'distance': -150.61458}, 'lang': 'es', 'title': 'Teoría del todo', 'url': 'https://es.wikipedia.org/wiki?curid=42764', 'views': 900, 'relevance_score': 0.99858963}),\n", 188 | " Document(page_content='In fisica la teoria del tutto, conosciuta anche come TOE (acronimo dell\\'inglese \"theory of everything\"), è un\\'ipotetica teoria fisica in grado di spiegare e riunire in un unico quadro tutti i fenomeni fisici conosciuti. Presupposto minimo di tale teoria è l\\'unificazione di tutte le interazioni fondamentali.', metadata={'_additional': {'distance': -150.09216}, 'lang': 'it', 'title': 'Teoria del tutto', 'url': 'https://it.wikipedia.org/wiki?curid=207970', 'views': 600, 'relevance_score': 0.9980957})]" 189 | ] 190 | }, 191 | "execution_count": 36, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "from langchain.retrievers.contextual_compression import ContextualCompressionRetriever\n", 198 | "from langchain.retrievers.document_compressors import CohereRerank\n", 199 | "\n", 200 | "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 15})\n", 201 | "compressor = CohereRerank(model='rerank-multilingual-v2.0', top_n=4 )\n", 202 | "compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)\n", 203 | "compressed_docs = compression_retriever.get_relevant_documents(\"Why is the theory of everything significant?\")\n", 204 | "compressed_docs" 205 | ] 206 | }, 207 | { 208 | "attachments": {}, 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "As you can see, there is a 'relevance_score' now added to the metadata of each Document retrived, this can ensure the most relavant Documents are used for the next step of answer generation via an LLM call. " 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 24, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "qa = RetrievalQA.from_chain_type(llm, retriever=compression_retriever)\n", 222 | "result = qa({\"query\": query})" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 25, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "' La teoria del tutto è significativa perché cerca di unificare tutte le interazioni fondamentali della natura in un unico quadro teorico.'" 234 | ] 235 | }, 236 | "execution_count": 25, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | } 240 | ], 241 | "source": [ 242 | "result['result']" 243 | ] 244 | }, 245 | { 246 | "attachments": {}, 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "Now the answer is still in a language different than English. There are many ways to fix this. An easy method is to edit the prompt_template to add a language parameter" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 26, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "from langchain.prompts import PromptTemplate\n", 260 | "\n", 261 | "prompt_template = \"\"\"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", 262 | "\n", 263 | "{context}\n", 264 | "\n", 265 | "Question: {question}\n", 266 | "Helpful Answer in {language}:\"\"\"\n", 267 | "PROMPT = PromptTemplate(template=prompt_template, input_variables=[\"context\", \"question\",\"language\"])" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 28, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "' The theory of everything is significant because it would attempt to unify the four fundamental forces of nature: gravity, strong nuclear force, weak nuclear force, and electromagnetism. It would also give us a deeper understanding of the relationships between different particles and forces.'" 279 | ] 280 | }, 281 | "execution_count": 28, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "qa = RetrievalQA.from_chain_type(llm, retriever=compression_retriever, chain_type_kwargs={\"prompt\": PROMPT.partial(language=\"english\")})\n", 288 | "result = qa({\"query\": query})\n", 289 | "result['result']" 290 | ] 291 | }, 292 | { 293 | "attachments": {}, 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "Great! Now we have a response in English and also from the most relavant sources from global Wikipedia articles. " 298 | ] 299 | } 300 | ], 301 | "metadata": { 302 | "kernelspec": { 303 | "display_name": "venv", 304 | "language": "python", 305 | "name": "python3" 306 | }, 307 | "language_info": { 308 | "codemirror_mode": { 309 | "name": "ipython", 310 | "version": 3 311 | }, 312 | "file_extension": ".py", 313 | "mimetype": "text/x-python", 314 | "name": "python", 315 | "nbconvert_exporter": "python", 316 | "pygments_lexer": "ipython3", 317 | "version": "3.11.1" 318 | }, 319 | "orig_nbformat": 4 320 | }, 321 | "nbformat": 4, 322 | "nbformat_minor": 2 323 | } 324 | --------------------------------------------------------------------------------