├── pages ├── __init__.py ├── 3_query_tranformations.py ├── 1_LLM.py ├── 4_rerankers.py └── 2_embeddings.py ├── docs └── constitution.pdf ├── utils ├── __init__.py └── query_translation.py ├── requirements.txt ├── LICENSE ├── .gitignore ├── README.md └── Home.py /pages/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/constitution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/misbahsy/RAGTune/HEAD/docs/constitution.pdf -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .query_translation import * 2 | 3 | __all__ = [ 4 | "run_hyde", 5 | "run_multi_query", 6 | "run_rag_fusion", 7 | "run_recursive_decomposition", 8 | "run_individual_decomposition", 9 | "run_step_back_rag", 10 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ragas 2 | langchain 3 | langchain-community 4 | langchain-openai 5 | python-dotenv 6 | chromadb 7 | cohere 8 | langchainhub 9 | streamlit 10 | tqdm 11 | unstructured[docx,pptx,csv,pdf] 12 | st-pages 13 | plotly 14 | git+https://github.com/misbahsy/rerankers.git#egg=rerankers[all] 15 | langchain-anthropic 16 | langchain-google-genai -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Misbah Syed 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAGTune 2 | 3 | RAGTune is an automated tuning and optimization tool for the RAG (Retrieval-Augmented Generation) pipeline. This tool allows you to evaluate different LLMs (Large Language Models), embedding models, query transformations, and rerankers. 4 | 5 | ## Getting Started 6 | 7 | ### Prerequisites 8 | 9 | Before you begin, ensure you have met the following requirements: 10 | - You have installed Python 3.10 or higher. 11 | - Some tests require a lot of API call, make sure you are willing to spend on the API calls. 12 | 13 | ### Cloning the Repository 14 | 15 | To clone the RAGTune repository, run the following command: 16 | 17 | ```bash 18 | git clone https://github.com/misbahsy/RAGTune.git 19 | cd RAGTune 20 | ``` 21 | 22 | ### Installing Dependencies 23 | 24 | Install the required dependencies by running: 25 | 26 | ```bash 27 | pip install -r requirements.txt 28 | ``` 29 | 30 | [Unstructured](https://www.unstructured.io/) is used as a document loader, make sure to install the dependencies mentioned by Unstructured. 31 | ```plaintext 32 | libmagic-dev (filetype detection) 33 | poppler-utils (images and PDFs) 34 | tesseract-ocr (images and PDFs, install tesseract-lang for additional language support) 35 | libreoffice (MS Office docs) 36 | pandoc (EPUBs, RTFs and Open Office docs). Please note that to handle RTF files, you need version 2.14.2 or newer. Running either make install-pandoc or ./scripts/install-pandoc.sh will install the correct version for you. 37 | ``` 38 | 39 | ### Setting Up Environment Variables 40 | 41 | Create a `.env` file in the root directory of the project and add your API keys: 42 | 43 | ```plaintext 44 | OPENAI_API_KEY=sk-xyz 45 | COHERE_API_KEY=cohere-xyz 46 | ANTHROPIC_API_KEY=anthropic-xyz 47 | ``` 48 | 49 | Replace `xyz` with your actual API keys. 50 | 51 | ### Running the Streamlit App 52 | 53 | To run the Streamlit app, execute the following command: 54 | 55 | ```bash 56 | streamlit run Home.py 57 | ``` 58 | 59 | This will start the Streamlit server and open the app in your default web browser. 60 | 61 | ### Dataset 62 | 63 | Make sure to first add your files and the corresponding Questions and Groud truth answers for the evaluations. The app will throw errors if this step is not completed. 64 | 65 | ### Step-wise Approach 66 | 67 | Each section is divided into steps to help you run the app in a step-wise manner. Skipping a step will throw errors. 68 | 69 | Additionally, if you notice an error box on a new tab, most likely it needs for you to run the prior steps before seeing the visual results. 70 | 71 | ### Evaluation Metrics 72 | 73 | This repo uses metrics defined by [Ragas](https://docs.ragas.io/en/stable/concepts/metrics/index.html#). Please refer to Ragas Docs for a deeper understanding of the evaluation metrics. 74 | 75 | | Metric Name | Metric Definition | 76 | |----------------------------------|-----------------------------------------------------------------------| 77 | | `ragas.metrics.answer_relevancy` | Scores the relevancy of the answer according to the given question. | 78 | | `ragas.metrics.answer_similarity`| Scores the semantic similarity of ground truth with generated answer. | 79 | | `ragas.metrics.answer_correctness`| Measures answer correctness compared to ground truth as a combination of factuality and semantic similarity. | 80 | | `ragas.metrics.context_precision` | Average Precision is a metric that evaluates whether all of the relevant items selected by the model are ranked higher or not. | 81 | | `ragas.metrics.context_recall` | Estimates context recall by estimating TP and FN using annotated answer and retrieved context. | 82 | | `ragas.metrics.context_entity_recall`| Calculates recall based on entities present in ground truth and context. | 83 | 84 | ### Rerankers 85 | 86 | Rerankers are imported from [Rerankers](https://github.com/AnswerDotAI/rerankers) library. Please make sure to add your API keys for the providers you intend to use. For some HF rerankers, you might have to accept the terms of use on the HF website. 87 | 88 | ## Customization 89 | 90 | ### Adding LLMs 91 | 92 | A few LLMs are added a starting point, feel free to modify the list or add your own LLM provider of choice in the list below: 93 | 94 | ```python 95 | llm_options = { 96 | "Cohere - command-light": lambda: ChatCohere(model_name="command-light", temperature=temperature, max_tokens=max_tokens), 97 | "Cohere - command": lambda: ChatCohere(model_name="command", temperature=temperature, max_tokens=max_tokens), 98 | "Cohere - command-r": lambda: ChatCohere(model_name="command-r", temperature=temperature, max_tokens=max_tokens), 99 | "OpenAI - gpt-3.5-turbo": lambda: ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens), 100 | "OpenAI - gpt-4-turbo-preview": lambda: ChatOpenAI(model_name="gpt-4-turbo-preview", temperature=temperature, max_tokens=max_tokens), 101 | "OpenAI - gpt-4": lambda: ChatOpenAI(model_name="gpt-4", temperature=temperature, max_tokens=max_tokens), 102 | "Anthropic - claude-3-opus-20240229": lambda: ChatAnthropic(model_name="claude-3-opus-20240229", temperature=temperature, max_tokens=max_tokens), 103 | "Anthropic - claude-3-sonnet-20240229": lambda: ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, max_tokens=max_tokens), 104 | "Anthropic - claude-3-haiku-20240307": lambda: ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, max_tokens=max_tokens), 105 | # "Ollama - Gemma": lambda: ChatOllama(model_name="gemma", temperature=temperature, max_tokens=max_tokens), 106 | } 107 | ``` 108 | Make sure to install any langchain dependencies for the provider and load any necessary api keys in the Home.py file. 109 | 110 | You can also add any embedding models, rerankers, query transformation techniques, etc. 111 | 112 | ## Tips 113 | 114 | - Feel free to experiment by adding and varying different RAG parameters. 115 | - Use virtual environments to manage dependencies and avoid conflicts. 116 | - Keep your API keys confidential and do not commit them to the repository. 117 | 118 | ## Contributing 119 | 120 | Contributions are welcome! Feel free to open an issue or submit a pull request. 121 | 122 | ## License 123 | 124 | This project is open-sourced under the MIT License. See the `LICENSE` file for more information. 125 | -------------------------------------------------------------------------------- /Home.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | import streamlit as st 4 | import pandas as pd 5 | # Load environment variables from .env file 6 | load_dotenv() 7 | # IMPORTANT: Remember to create a .env variable containing: OPENAI_API_KEY=sk-xyz where xyz is your key 8 | 9 | # Access the API key from the environment variable 10 | os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY") 11 | os.environ['COHERE_API_KEY'] = os.environ.get("COHERE_API_KEY") 12 | os.environ['ANTHROPIC_API_KEY'] = os.environ.get("ANTHROPIC_API_KEY") 13 | 14 | 15 | from st_pages import Page, show_pages, add_page_title 16 | st.sidebar.header("RAGTune") 17 | 18 | # # Optional -- adds the title and icon to the current page 19 | add_page_title() 20 | 21 | # Specify what pages should be shown in the sidebar, and what their titles and icons 22 | show_pages( 23 | [ 24 | Page("Home.py", "Upload Document and Assign Dataset"), 25 | Page("pages/1_LLM.py", "Evaluate LLM Models"), 26 | Page("pages/2_embeddings.py", "Evaluate Embeddings"), 27 | Page("pages/3_query_tranformations.py", "Evaluate Query Transformations"), 28 | Page("pages/4_rerankers.py", "Evaluate Rerankers"), 29 | # Page("pages/5_prompt_optimizer.py", "Prompt Optimization using DSPy"), coming soon 30 | 31 | ] 32 | ) 33 | 34 | # Initialize doc_path with a default value 35 | doc_path = "docs/constitution.pdf" 36 | 37 | # Initialize session state keys if they don't exist 38 | if 'eval_questions' not in st.session_state: 39 | st.session_state['eval_questions'] = [] 40 | if 'eval_answers' not in st.session_state: 41 | st.session_state['eval_answers'] = [] 42 | 43 | 44 | # Check if the user wants to use the default document or upload their own 45 | st.header('Document Selection') 46 | document_option = st.radio("Choose your document source", ('Upload a file', 'Use default test document')) 47 | 48 | if document_option == 'Upload a file': 49 | st.session_state['eval_questions'] = [""] 50 | st.session_state['eval_answers'] = [""] 51 | # Allow multiple files to be uploaded including pdf, csv, doc, docx, ppt, pptx 52 | uploaded_files = st.file_uploader("Choose files", type=['pdf', 'csv', 'docx', 'pptx'], accept_multiple_files=True) 53 | if uploaded_files: 54 | # Ensure the 'uploaded_docs' directory exists before saving the files 55 | upload_dir = "uploaded_docs" 56 | if not os.path.exists(upload_dir): 57 | os.makedirs(upload_dir) 58 | # Save the uploaded files and collect their paths 59 | for uploaded_file in uploaded_files: 60 | with open(os.path.join(upload_dir, uploaded_file.name), "wb") as f: 61 | f.write(uploaded_file.getbuffer()) 62 | 63 | # Update session state with the directory name of uploaded documents 64 | st.session_state['doc_path'] = upload_dir 65 | 66 | # User input for eval_questions and eval_answers 67 | st.subheader('Provide Evaluation Questions and Answers') 68 | 69 | data = { 70 | 'Questions': st.session_state['eval_questions'], 71 | 'Ground Truth': st.session_state['eval_answers'] 72 | } 73 | qa_df = pd.DataFrame(data) 74 | edited_qa_df = st.data_editor(data, num_rows="dynamic", use_container_width=True, hide_index=True) 75 | 76 | eval_questions_list = edited_qa_df['Questions'] 77 | eval_answers_list = edited_qa_df['Ground Truth'] 78 | 79 | if st.button("Save eval Q&As"): 80 | # Check if the number of questions matches the number of answers 81 | st.session_state['eval_questions'] = eval_questions_list 82 | st.session_state['eval_answers'] = eval_answers_list 83 | st.success("Evaluation questions and answers saved successfully!") 84 | 85 | else: 86 | # Use the default document 87 | doc_path = "docs" 88 | st.write("Using the default document: Constitution.pdf") 89 | 90 | # Default eval_questions and eval_answers 91 | eval_questions = [ 92 | "what is article I of the constitution of the US about?", 93 | "How many sections does ARTICLE. IV have?", 94 | "Who is elegible to be the President of the US?", 95 | "What majority is needed to amend the constitution", 96 | "How many states are sufficient for ratification of the constitution?", 97 | ] 98 | eval_answers = [ 99 | "Article I of the United States Constitution establishes the legislative branch of the federal government, known as the United States Congress. It outlines that all legislative powers are vested in Congress, which is divided into two parts: the House of Representatives and the Senate. The bicameral Congress was created as a compromise between large and small states, with representation based on population and equal representation for states. Article I grants Congress enumerated powers and the authority to pass laws necessary for carrying out those powers. It also sets out procedures for passing bills and imposes limits on Congress's authority. Additionally, Article I's Vesting Clause ensures that all federal legislative power belongs to Congress, emphasizing the separation of powers among the three branches of government", 100 | "4 Sections", 101 | "No Person except a natural born Citizen, or a Citizen of the United States, at the time of the Adoption of this Constitution, shall be eligible to the Office of President; neither shall any Per- son be eligible to that Office who shall not have attained to the Age of thirty five Years, and been fourteen Years a Resident within the United States.", 102 | "The Congress, whenever two thirds of both Houses shall deem it necessary, shall propose Amendments to this Constitution, or, on the Ap- plication of the Legislatures of two thirds of the several States, shall call a Convention for pro- posing Amendments", 103 | "The Ratification of the Conventions of nine States, shall be sufficient for the Establishment of this Constitution between the States so rati- fying the Same.", 104 | ] 105 | 106 | # Assign the default questions and answers to the state 107 | st.session_state['eval_questions'] = eval_questions 108 | st.session_state['eval_answers'] = eval_answers 109 | st.session_state['doc_path'] = doc_path 110 | 111 | # Display eval questions and answers if available 112 | if st.session_state.get('eval_questions') and st.session_state.get('eval_answers'): 113 | st.subheader('Saved Evaluation Questions and Answers') 114 | # Convert eval_questions and eval_answers to a DataFrame and display it 115 | eval_qa_df = pd.DataFrame({ 116 | 'Questions': st.session_state['eval_questions'], 117 | 'Ground Truth': st.session_state['eval_answers'] 118 | }) 119 | st.dataframe(eval_qa_df, use_container_width=True , hide_index=True) 120 | if len(eval_qa_df["Questions"]) >= 4: 121 | st.subheader('Proceed to one of the tabs on the left to perform Evaluations') 122 | st.page_link("pages/1_LLM.py", label="LLM") 123 | st.page_link("pages/2_embeddings.py", label="Embeddings") 124 | st.page_link("pages/3_query_tranformations.py", label="Query Tranformations") 125 | st.page_link("pages/4_rerankers.py", label="Rerankers") 126 | 127 | else: 128 | st.warning('Please add at least 4 rows of data for evaluation') 129 | 130 | else: 131 | st.header('No evaluation questions and answers provided.') 132 | -------------------------------------------------------------------------------- /pages/3_query_tranformations.py: -------------------------------------------------------------------------------- 1 | from langchain.indexes import VectorstoreIndexCreator 2 | from langchain.chains import RetrievalQA 3 | from langchain_openai import ChatOpenAI 4 | from langchain_community.chat_models import ChatCohere 5 | from langchain_community.embeddings import CohereEmbeddings 6 | from langchain import hub 7 | from langchain_community.vectorstores import Chroma 8 | from langchain_core.output_parsers import StrOutputParser 9 | from langchain_core.runnables import RunnablePassthrough 10 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 11 | from langchain_text_splitters import RecursiveCharacterTextSplitter 12 | import streamlit as st 13 | from langchain_anthropic import ChatAnthropic 14 | from ragas.metrics import ( 15 | answer_relevancy, 16 | faithfulness, 17 | context_recall, 18 | context_precision, 19 | context_relevancy, 20 | answer_similarity, 21 | answer_correctness, 22 | ) 23 | from langchain_community.document_loaders import DirectoryLoader 24 | import pandas as pd 25 | import random 26 | import string 27 | import streamlit as st 28 | import pandas as pd 29 | from utils import * 30 | from st_pages import add_page_title 31 | 32 | add_page_title() 33 | 34 | st.sidebar.header("RAGTune") 35 | 36 | if 'eval_questions' not in st.session_state or 'eval_answers' not in st.session_state or 'doc_path' not in st.session_state: 37 | st.warning("Please upload a document and save eval questions and answers") 38 | 39 | # Generating random string to be used as collection name in chroma to avoid embedding dimensions error 40 | def generate_random_string(length=10): 41 | """Generate a random string of alphanumeric characters""" 42 | characters = string.ascii_letters + string.digits 43 | random_string = ''.join(random.choice(characters) for _ in range(length)) 44 | return random_string 45 | 46 | # First, we need to import Streamlit to access the session state. 47 | eval_questions = st.session_state['eval_questions'] 48 | eval_answers = st.session_state['eval_answers'] 49 | doc_path = st.session_state['doc_path'] 50 | 51 | # Now, we use the 'doc_path' from the session state to load the document. 52 | # We assume that 'doc_path' has been set in the session state in the Home.py file. 53 | loader = DirectoryLoader(doc_path, show_progress=True, use_multithreading=True) 54 | 55 | 56 | # Ask the user for input values for chunk_size, chunk_overlap, number_of_source_documents, search_type, temperature, and embeddings 57 | st.subheader("Step 1: Generate embeddings") 58 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=1, value=1000) 59 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=200) 60 | # number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4) 61 | # search_type = st.selectbox('Select the search type:', ('similarity', 'mmr')) 62 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings')) 63 | 64 | # Based on the user's choice of embeddings, instantiate the appropriate embeddings class 65 | if embeddings_option == 'CohereEmbeddings': 66 | embeddings = CohereEmbeddings() 67 | elif embeddings_option == 'OpenAIEmbeddings': 68 | embeddings = OpenAIEmbeddings() 69 | 70 | # Create a text splitter with the user-defined chunk_size and chunk_overlap 71 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 72 | 73 | 74 | 75 | if st.button('Generate Embeddings'): 76 | docs = loader.load() 77 | splits = text_splitter.split_documents(docs) 78 | # Create a vectorstore with the user-defined embeddings 79 | vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="./chroma_db", collection_name=generate_random_string()) 80 | 81 | # _retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs={"k": number_of_source_documents}) 82 | _retriever = vectorstore.as_retriever() 83 | st.session_state['_retriever'] = _retriever 84 | 85 | st.subheader("Step 2: Select LLM settings") 86 | llm_options = { 87 | "Cohere - command-light": lambda: ChatCohere(temperature=temperature, max_tokens=max_tokens), 88 | "Cohere - command": lambda: ChatCohere(temperature=temperature, max_tokens=max_tokens), 89 | "Cohere - command-r": lambda: ChatCohere(temperature=temperature, max_tokens=max_tokens), 90 | "OpenAI - gpt-3.5-turbo": lambda: ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens), 91 | "OpenAI - gpt-4-turbo-preview": lambda: ChatOpenAI(model_name="gpt-4-turbo-preview", temperature=temperature, max_tokens=max_tokens), 92 | "OpenAI - gpt-4": lambda: ChatOpenAI(model_name="gpt-4", temperature=temperature, max_tokens=max_tokens), 93 | "Anthropic - claude-3-opus-20240229": lambda: ChatAnthropic(model_name="claude-3-opus-20240229", temperature=temperature, max_tokens=max_tokens), 94 | "Anthropic - claude-3-sonnet-20240229": lambda: ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, max_tokens=max_tokens), 95 | "Anthropic - claude-3-haiku-20240307": lambda: ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, max_tokens=max_tokens), 96 | } 97 | default_llm = st.selectbox('Select the default LLM model:', options=list(llm_options.keys())) 98 | temperature = st.slider('Select the temperature for the language model:', min_value=0.0, max_value=2.0, value=0.7) 99 | max_tokens = st.number_input('Enter the max tokens for the output:', min_value=0, max_value=4096, value=400) 100 | 101 | llm = llm_options[default_llm]() 102 | 103 | # st.subheader("Step 3: Select Query ") 104 | 105 | # Define a dictionary mapping query transformation names to their respective functions 106 | query_transformation_options = { 107 | 'Multi Query': run_multi_query, 108 | 'RAG Fusion': run_rag_fusion, 109 | 'Decomposition Recursive': run_recursive_decomposition, 110 | 'Decomposition Individual': run_individual_decomposition, 111 | 'Step Back': run_step_back_rag, 112 | 'HyDE': run_hyde, 113 | } 114 | 115 | st.subheader("Step 3: Select query transformations") 116 | selected_transformations = st.multiselect('Select the query transformations to evaluate:', options=list(query_transformation_options.keys())) 117 | 118 | # Add an "Evaluate" button 119 | if st.button('Evaluate'): 120 | results = {} 121 | _retriever = st.session_state['_retriever'] 122 | for transformation_name in selected_transformations: 123 | transformation_func = query_transformation_options[transformation_name] 124 | answers, contexts = [], [] 125 | for question in eval_questions: 126 | # queries = transformation_func.invoke({"question": question}) 127 | # retrieved_docs = _retriever.get_relevant_documents(queries) 128 | # answer = generate_answer(question, retrieved_docs) 129 | answer, retrieved_docs = transformation_func(llm, _retriever, question) 130 | # print("answer", answer) 131 | # print(transformation_name,retrieved_docs) 132 | answers.append(answer) 133 | contexts.append([doc.page_content for doc in retrieved_docs]) 134 | 135 | from datasets import Dataset 136 | response_dataset = Dataset.from_dict({ 137 | "question": eval_questions, 138 | "answer": answers, 139 | "contexts": contexts, 140 | "ground_truth": eval_answers 141 | }) 142 | 143 | from ragas import evaluate 144 | result = evaluate( 145 | response_dataset, 146 | metrics=[ 147 | faithfulness, 148 | answer_relevancy, 149 | answer_similarity, 150 | answer_correctness, 151 | context_recall, 152 | context_precision, 153 | context_relevancy, 154 | ], 155 | ) 156 | 157 | df_results = result.to_pandas() 158 | results[transformation_name] = { 159 | "overview": result, 160 | "details": df_results 161 | } 162 | 163 | st.session_state['query_transformation_results'] = results 164 | 165 | if 'query_transformation_results' in st.session_state and st.session_state['query_transformation_results']: 166 | with st.expander("See Evaluation Results"): 167 | for transformation_name, results in st.session_state['query_transformation_results'].items(): 168 | st.write(f"Overview of Results for {transformation_name}:") 169 | st.write(results["overview"]) 170 | st.write(f"Details of Results for {transformation_name}:") 171 | st.dataframe(results["details"], use_container_width=True) 172 | else: 173 | st.warning("No results available. Please run the evaluation first.") 174 | 175 | st.subheader("Step 4: Visualize Data") 176 | 177 | if st.button('Prepare Charts'): 178 | if 'query_transformation_results' in st.session_state: 179 | results = st.session_state['query_transformation_results'] 180 | 181 | data = [] 182 | for transformation_name, transformation_result in results.items(): 183 | result = transformation_result["overview"] 184 | for metric_name, metric_value in result.items(): 185 | data.append({ 186 | "Transformation": transformation_name, 187 | "Metric": metric_name, 188 | "Value": metric_value 189 | }) 190 | 191 | visual_df = pd.DataFrame(data) 192 | st.session_state["visual_df"] = visual_df 193 | else: 194 | st.warning("No results available. Please run the evaluation first.") 195 | 196 | if 'visual_df' in st.session_state and not st.session_state['visual_df'].empty: 197 | with st.expander("See Visualization Results"): 198 | import plotly.express as px 199 | st.subheader("Side-by-Side Bar Charts") 200 | fig = px.bar(st.session_state["visual_df"], x="Metric", y="Value", color="Transformation", barmode='group', height=400) 201 | st.plotly_chart(fig) 202 | 203 | st.subheader("Overlaid Line Charts") 204 | line_chart = st.line_chart(st.session_state["visual_df"].pivot(index='Metric', columns='Transformation', values='Value')) 205 | 206 | st.write("Dataframe for download") 207 | st.session_state["visual_df"] 208 | else: 209 | st.warning("No plots available. Please run the 'Prepare Charts' step first.") -------------------------------------------------------------------------------- /pages/1_LLM.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import ChatOpenAI 2 | from langchain_community.chat_models import ChatCohere 3 | from langchain_community.embeddings import CohereEmbeddings 4 | from langchain import hub 5 | from langchain_community.vectorstores import Chroma 6 | from langchain_core.output_parsers import StrOutputParser 7 | from langchain_core.runnables import RunnablePassthrough 8 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 9 | from langchain_text_splitters import RecursiveCharacterTextSplitter 10 | import streamlit as st 11 | from langchain_anthropic import ChatAnthropic 12 | from ragas.metrics import ( 13 | answer_relevancy, 14 | faithfulness, 15 | context_recall, 16 | context_precision, 17 | context_relevancy, 18 | answer_similarity, 19 | answer_correctness, 20 | ) 21 | from langchain_community.document_loaders import DirectoryLoader 22 | import pandas as pd 23 | import random 24 | import string 25 | from langchain_community.chat_models import ChatOllama 26 | from st_pages import add_page_title 27 | 28 | add_page_title() 29 | 30 | st.sidebar.header("RAGTune") 31 | 32 | if 'eval_questions' not in st.session_state or 'eval_answers' not in st.session_state or 'doc_path' not in st.session_state: 33 | st.warning("Please upload a document and save eval questions and answers") 34 | 35 | # Generating random string to be used as collection name in chroma to avoid embedding dimensions error 36 | def generate_random_string(length=10): 37 | """Generate a random string of alphanumeric characters""" 38 | characters = string.ascii_letters + string.digits 39 | random_string = ''.join(random.choice(characters) for _ in range(length)) 40 | return random_string 41 | 42 | # First, we need to import Streamlit to access the session state. 43 | eval_questions = st.session_state['eval_questions'] 44 | eval_answers = st.session_state['eval_answers'] 45 | doc_path = st.session_state['doc_path'] 46 | 47 | # Now, we use the 'doc_path' from the session state to load the document. 48 | # We assume that 'doc_path' has been set in the session state in the Home.py file. 49 | loader = DirectoryLoader(doc_path, show_progress=True, use_multithreading=True) 50 | 51 | 52 | # Ask the user for input values for chunk_size, chunk_overlap, number_of_source_documents, search_type, temperature, and embeddings 53 | st.subheader("Step 1: Generate embeddings") 54 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=1, value=1000) 55 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=200) 56 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4) 57 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr')) 58 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings')) 59 | 60 | # Based on the user's choice of embeddings, instantiate the appropriate embeddings class 61 | if embeddings_option == 'CohereEmbeddings': 62 | embeddings = CohereEmbeddings() 63 | elif embeddings_option == 'OpenAIEmbeddings': 64 | embeddings = OpenAIEmbeddings() 65 | 66 | # Create a text splitter with the user-defined chunk_size and chunk_overlap 67 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 68 | 69 | 70 | 71 | if st.button('Generate Embeddings'): 72 | docs = loader.load() 73 | splits = text_splitter.split_documents(docs) 74 | # Create a vectorstore with the user-defined embeddings 75 | vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="./chroma_db", collection_name=generate_random_string()) 76 | # Create a retriever with the user-defined search_type and number_of_source_documents 77 | _retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs={"k": number_of_source_documents}) 78 | st.session_state['_retriever'] = _retriever 79 | 80 | 81 | st.subheader("Step 2: Select default LLM settings") 82 | 83 | temperature = st.slider('Select the temperature for the language model:', min_value=0.0, max_value=2.0, value=0.7) 84 | max_tokens = st.number_input('Enter the max tokens for the output:', min_value=0, max_value=4096, value=400) 85 | 86 | # Retrieve the prompt from the hub 87 | _prompt = hub.pull("rlm/rag-prompt") 88 | 89 | st.write( 90 | """ 91 | Prompt: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 92 | If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. 93 | Question: {question} 94 | Context: {context} 95 | Answer: 96 | """ 97 | ) 98 | 99 | # Let's allow the user to select multiple LLMs to evaluate. 100 | llm_options = { 101 | "Cohere - command-light": lambda: ChatCohere(model_name="command-light", temperature=temperature, max_tokens=max_tokens), 102 | "Cohere - command": lambda: ChatCohere(model_name="command", temperature=temperature, max_tokens=max_tokens), 103 | "Cohere - command-r": lambda: ChatCohere(model_name="command-r", temperature=temperature, max_tokens=max_tokens), 104 | "OpenAI - gpt-3.5-turbo": lambda: ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens), 105 | "OpenAI - gpt-4-turbo-preview": lambda: ChatOpenAI(model_name="gpt-4-turbo-preview", temperature=temperature, max_tokens=max_tokens), 106 | "OpenAI - gpt-4": lambda: ChatOpenAI(model_name="gpt-4", temperature=temperature, max_tokens=max_tokens), 107 | "Anthropic - claude-3-opus-20240229": lambda: ChatAnthropic(model_name="claude-3-opus-20240229", temperature=temperature, max_tokens=max_tokens), 108 | "Anthropic - claude-3-sonnet-20240229": lambda: ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, max_tokens=max_tokens), 109 | "Anthropic - claude-3-haiku-20240307": lambda: ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, max_tokens=max_tokens), 110 | # "Ollama - Gemma": lambda: ChatOllama(model_name="gemma", temperature=temperature, max_tokens=max_tokens), 111 | 112 | } 113 | 114 | # Ask the user to select the LLMs they want to evaluate. 115 | st.subheader("Step 3: Select the LLM models to be evaluated") 116 | selected_llms = st.multiselect('Select the LLMs to evaluate:', options=list(llm_options.keys())) 117 | 118 | @st.cache_data 119 | def evaluate_llms(selected_llms, eval_questions, eval_answers, _retriever, _prompt): 120 | # We will store the results in a dictionary for visualization later. 121 | llm_results = {} 122 | 123 | # Iterate over each selected LLM and perform the evaluation. 124 | for llm_name in selected_llms: 125 | llm = llm_options[llm_name]() 126 | rag_chain = ( 127 | {"context": _retriever | format_docs, "question": RunnablePassthrough()} 128 | | _prompt 129 | | llm 130 | | StrOutputParser() 131 | ) 132 | 133 | # Run the RAG chain for each question and collect answers and contexts. 134 | answers = [] 135 | contexts = [] 136 | for question in eval_questions: 137 | response = rag_chain.invoke(question) 138 | answers.append(response) 139 | retrieved_docs = _retriever.invoke(question) 140 | contexts.append([context.page_content for context in retrieved_docs]) 141 | 142 | # Create a Hugging Face dataset from the responses. 143 | from datasets import Dataset 144 | response_dataset = Dataset.from_dict({ 145 | "question": eval_questions, 146 | "answer": answers, 147 | "contexts": contexts, 148 | "ground_truth": eval_answers 149 | }) 150 | 151 | # Evaluate the dataset using the specified metrics. 152 | from ragas import evaluate 153 | result = evaluate( 154 | response_dataset, 155 | metrics=[ 156 | faithfulness, 157 | answer_relevancy, 158 | answer_similarity, 159 | answer_correctness, 160 | ], 161 | ) 162 | 163 | # Convert the results to a pandas DataFrame for easier visualization. 164 | df_results = result.to_pandas() 165 | llm_results[llm_name] = { 166 | "overview": result, 167 | "details": df_results 168 | } 169 | 170 | return llm_results 171 | 172 | # Add an "Evaluate" button that the user must press to run the evaluation. 173 | if st.button('Evaluate'): 174 | # Function to format the documents for the RAG chain. 175 | def format_docs(docs): 176 | return "\n\n".join(doc.page_content for doc in docs) 177 | 178 | if 'llm_results' not in st.session_state: 179 | st.session_state['llm_results'] = {} 180 | st.session_state['llm_results'] = evaluate_llms(selected_llms, st.session_state['eval_questions'], st.session_state['eval_answers'], st.session_state['_retriever'], _prompt) 181 | 182 | if 'llm_results' in st.session_state and st.session_state['llm_results']: 183 | # Now, let's visualize the results for each LLM. 184 | with st.expander("See Evaluation Results"): 185 | for llm_name, results in st.session_state['llm_results'].items(): 186 | st.write(f"Overview of Results for {llm_name}:") 187 | st.write(results["overview"]) 188 | st.write(f"Details of Results for {llm_name}:") 189 | st.dataframe(results["details"], use_container_width=True) 190 | else: 191 | st.warning("No results available. Please run the evaluation first.") 192 | 193 | st.subheader("Step 4: Visualize Data") 194 | 195 | if st.button('Prepare Charts'): 196 | 197 | if 'llm_results' in st.session_state: 198 | results = st.session_state['llm_results'] 199 | 200 | # Convert the results dictionary to a pandas DataFrame 201 | data = [] 202 | for model_name, model_result in results.items(): 203 | result = model_result["overview"] 204 | print(result) 205 | for metric_name, metric_value in result.items(): 206 | data.append({ 207 | "Model": model_name, 208 | "Metric": metric_name, 209 | "Value": metric_value 210 | }) 211 | 212 | visual_df = pd.DataFrame(data) 213 | st.session_state["visual_df"] = visual_df 214 | 215 | else: 216 | st.warning("No results available. Please run the evaluation first.") 217 | 218 | 219 | if 'visual_df' in st.session_state and not st.session_state['visual_df'].empty: 220 | # Now, let's visualize the results for each LLM. 221 | with st.expander("See Visualization Results"): 222 | # Side-by-Side Bar Charts 223 | st.subheader("Side-by-Side Bar Charts") 224 | 225 | import plotly.express as px 226 | # Create the grouped bar chart with Plotly 227 | fig = px.bar(st.session_state["visual_df"], x="Metric", y="Value", color="Model", barmode='group', height=400) 228 | # Display the figure in the Streamlit app 229 | st.plotly_chart(fig) 230 | 231 | # Overlaid Line Charts 232 | st.subheader("Overlaid Line Charts") 233 | line_chart = st.line_chart(st.session_state["visual_df"].pivot(index='Metric', columns='Model', values='Value')) 234 | 235 | st.write("Dataframe for download") 236 | st.session_state["visual_df"] 237 | 238 | else: 239 | st.warning("No plots available. Please run the 'Prepare Charts' step first.") -------------------------------------------------------------------------------- /pages/4_rerankers.py: -------------------------------------------------------------------------------- 1 | # from langchain.document_loaders import PyPDFLoader 2 | from langchain.indexes import VectorstoreIndexCreator 3 | from langchain.chains import RetrievalQA 4 | from langchain_openai import ChatOpenAI 5 | from langchain_community.chat_models import ChatCohere 6 | from langchain_community.embeddings import CohereEmbeddings 7 | from langchain import hub 8 | from langchain_community.vectorstores import Chroma 9 | from langchain_core.output_parsers import StrOutputParser 10 | from langchain_core.runnables import RunnablePassthrough 11 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 12 | from langchain_text_splitters import RecursiveCharacterTextSplitter 13 | import streamlit as st 14 | from langchain_anthropic import ChatAnthropic 15 | from ragas.metrics import ( 16 | answer_relevancy, 17 | faithfulness, 18 | context_recall, 19 | context_precision, 20 | context_relevancy, 21 | answer_similarity, 22 | answer_correctness, 23 | ) 24 | from langchain_community.document_loaders import DirectoryLoader 25 | import pandas as pd 26 | import random 27 | import string 28 | from rerankers import Reranker 29 | from langchain.retrievers import ContextualCompressionRetriever 30 | import os 31 | from dotenv import load_dotenv 32 | from st_pages import add_page_title 33 | 34 | add_page_title() 35 | 36 | # Load environment variables from .env file 37 | load_dotenv() 38 | # IMPORTANT: Remember to create a .env variable containing: OPENAI_API_KEY=sk-xyz where xyz is your key 39 | # Access the API key from the environment variable 40 | st.sidebar.header("RAGTune") 41 | os.environ['COHERE_API_KEY'] = os.environ.get("COHERE_API_KEY") 42 | 43 | if 'eval_questions' not in st.session_state or 'eval_answers' not in st.session_state or 'doc_path' not in st.session_state: 44 | st.warning("Please upload a document and save eval questions and answers") 45 | 46 | # Generating random string to be used as collection name in chroma to avoid embedding dimensions error 47 | def generate_random_string(length=10): 48 | """Generate a random string of alphanumeric characters""" 49 | characters = string.ascii_letters + string.digits 50 | random_string = ''.join(random.choice(characters) for _ in range(length)) 51 | return random_string 52 | 53 | # First, we need to import Streamlit to access the session state. 54 | eval_questions = st.session_state['eval_questions'] 55 | eval_answers = st.session_state['eval_answers'] 56 | doc_path = st.session_state['doc_path'] 57 | 58 | # Now, we use the 'doc_path' from the session state to load the document. 59 | # We assume that 'doc_path' has been set in the session state in the Home.py file. 60 | loader = DirectoryLoader(doc_path, show_progress=True, use_multithreading=True) 61 | 62 | 63 | # Ask the user for input values for chunk_size, chunk_overlap, number_of_source_documents, search_type, temperature, and embeddings 64 | st.subheader("Step 1: Generate embeddings") 65 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=1, value=1000) 66 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=200) 67 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4) 68 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr')) 69 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings')) 70 | 71 | # Based on the user's choice of embeddings, instantiate the appropriate embeddings class 72 | if embeddings_option == 'CohereEmbeddings': 73 | embeddings = CohereEmbeddings() 74 | elif embeddings_option == 'OpenAIEmbeddings': 75 | embeddings = OpenAIEmbeddings() 76 | 77 | # Create a text splitter with the user-defined chunk_size and chunk_overlap 78 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 79 | 80 | if st.button('Generate Embeddings'): 81 | docs = loader.load() 82 | splits = text_splitter.split_documents(docs) 83 | # Create a vectorstore with the user-defined embeddings 84 | vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="./chroma_db", collection_name=generate_random_string()) 85 | # Create a retriever with the user-defined search_type and number_of_source_documents 86 | _retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs={"k": number_of_source_documents}) 87 | st.session_state['_retriever'] = _retriever 88 | 89 | st.subheader("Step 2: Select default LLM settings") 90 | 91 | temperature = st.slider('Select the temperature for the language model:', min_value=0.0, max_value=2.0, value=0.7) 92 | max_tokens = st.number_input('Enter the max tokens for the output:', min_value=0, max_value=4096, value=400) 93 | 94 | # Retrieve the prompt from the hub 95 | _prompt = hub.pull("rlm/rag-prompt") 96 | 97 | st.write( 98 | """ 99 | Prompt: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 100 | If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. 101 | Question: {question} 102 | Context: {context} 103 | Answer: 104 | """ 105 | ) 106 | 107 | # Let's allow the user to select a single LLM and multiple rerankers to evaluate. 108 | llm_options = { 109 | "Cohere - command-light": lambda: ChatCohere(model_name="command-light", temperature=temperature, max_tokens=max_tokens), 110 | "Cohere - command": lambda: ChatCohere(model_name="command", temperature=temperature, max_tokens=max_tokens), 111 | "Cohere - command-r": lambda: ChatCohere(model_name="command-r", temperature=temperature, max_tokens=max_tokens), 112 | "OpenAI - gpt-3.5-turbo": lambda: ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens), 113 | "OpenAI - gpt-4-turbo-preview": lambda: ChatOpenAI(model_name="gpt-4-turbo-preview", temperature=temperature, max_tokens=max_tokens), 114 | "OpenAI - gpt-4": lambda: ChatOpenAI(model_name="gpt-4", temperature=temperature, max_tokens=max_tokens), 115 | "Anthropic - claude-3-opus-20240229": lambda: ChatAnthropic(model_name="claude-3-opus-20240229", temperature=temperature, max_tokens=max_tokens), 116 | "Anthropic - claude-3-sonnet-20240229": lambda: ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, max_tokens=max_tokens), 117 | "Anthropic - claude-3-haiku-20240307": lambda: ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, max_tokens=max_tokens), 118 | } 119 | 120 | reranker_options = { 121 | "Cross-encoder default": lambda: Reranker('cross-encoder'), 122 | "MixedBread-AI Cross-encoder": lambda: Reranker('mixedbread-ai/mxbai-rerank-xlarge-v1', model_type='cross-encoder'), 123 | "Default T5 Seq2Seq reranker": lambda: Reranker("t5"), 124 | "InRanker-base T5 Seq2Seq reranker": lambda: Reranker("unicamp-dl/InRanker-base", model_type="t5"), 125 | "Cohere API reranker": lambda: Reranker("cohere", lang='en', api_key=os.environ['COHERE_API_KEY']), 126 | # "Jina API reranker": lambda: Reranker("jina", api_key=st.secrets["JINA_API_KEY"]), 127 | # "RankGPT4-turbo": lambda: Reranker("rankgpt", api_key=st.secrets["OPENAI_API_KEY"]), 128 | # "RankGPT3-turbo": lambda: Reranker("rankgpt3", api_key=st.secrets["OPENAI_API_KEY"]), 129 | "ColBERTv2 reranker": lambda: Reranker("colbert"), 130 | } 131 | 132 | # Ask the user to select the LLM and rerankers they want to evaluate. 133 | st.subheader("Step 3: Select the LLM and rerankers to be evaluated") 134 | selected_llm = st.selectbox('Select the LLM to evaluate:', options=list(llm_options.keys())) 135 | selected_rerankers = st.multiselect('Select the rerankers to evaluate:', options=list(reranker_options.keys())) 136 | 137 | @st.cache_data 138 | def evaluate_rerankers(selected_llm, selected_rerankers, eval_questions, eval_answers, _retriever, _prompt): 139 | # We will store the results in a dictionary for visualization later. 140 | reranker_results = {} 141 | 142 | # Instantiate the selected LLM 143 | llm = llm_options[selected_llm]() 144 | 145 | # Iterate over each selected reranker and perform the evaluation. 146 | for reranker_name in selected_rerankers: 147 | ranker = reranker_options[reranker_name]() 148 | compressor = ranker.as_langchain_compressor(k=3) 149 | compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=_retriever) 150 | 151 | rag_chain = ( 152 | {"context": compression_retriever | format_docs, "question": RunnablePassthrough()} 153 | | _prompt 154 | | llm 155 | | StrOutputParser() 156 | ) 157 | 158 | # Run the RAG chain for each question and collect answers and contexts. 159 | answers = [] 160 | contexts = [] 161 | for question in eval_questions: 162 | response = rag_chain.invoke(question) 163 | answers.append(response) 164 | retrieved_docs = compression_retriever.invoke(question) 165 | contexts.append([context.page_content for context in retrieved_docs]) 166 | 167 | # Create a Hugging Face dataset from the responses. 168 | from datasets import Dataset 169 | response_dataset = Dataset.from_dict({ 170 | "question": eval_questions, 171 | "answer": answers, 172 | "contexts": contexts, 173 | "ground_truth": eval_answers 174 | }) 175 | 176 | # Evaluate the dataset using the specified metrics. 177 | from ragas import evaluate 178 | result = evaluate( 179 | response_dataset, 180 | metrics=[ 181 | faithfulness, 182 | answer_relevancy, 183 | answer_similarity, 184 | answer_correctness, 185 | context_recall, 186 | context_precision, 187 | context_relevancy, 188 | ], 189 | ) 190 | 191 | # Convert the results to a pandas DataFrame for easier visualization. 192 | df_results = result.to_pandas() 193 | reranker_results[reranker_name] = { 194 | "overview": result, 195 | "details": df_results 196 | } 197 | 198 | return reranker_results 199 | 200 | # Add an "Evaluate" button that the user must press to run the evaluation. 201 | if st.button('Evaluate'): 202 | # Function to format the documents for the RAG chain. 203 | def format_docs(docs): 204 | return "\n\n".join(doc.page_content for doc in docs) 205 | 206 | if 'reranker_results' not in st.session_state: 207 | st.session_state['reranker_results'] = {} 208 | st.session_state['reranker_results'] = evaluate_rerankers(selected_llm, selected_rerankers, st.session_state['eval_questions'], st.session_state['eval_answers'], st.session_state['_retriever'], _prompt) 209 | 210 | if 'reranker_results' in st.session_state and st.session_state['reranker_results']: 211 | # Now, let's visualize the results for each reranker. 212 | with st.expander("See Evaluation Results"): 213 | for reranker_name, results in st.session_state['reranker_results'].items(): 214 | st.write(f"Overview of Results for {reranker_name}:") 215 | st.write(results["overview"]) 216 | st.write(f"Details of Results for {reranker_name}:") 217 | st.dataframe(results["details"], use_container_width=True) 218 | else: 219 | st.warning("No results available. Please run the evaluation first.") 220 | 221 | st.subheader("Step 4: Visualize Data") 222 | 223 | if st.button('Prepare Charts'): 224 | 225 | if 'reranker_results' in st.session_state: 226 | results = st.session_state['reranker_results'] 227 | 228 | # Convert the results dictionary to a pandas DataFrame 229 | data = [] 230 | for reranker_name, reranker_result in results.items(): 231 | result = reranker_result["overview"] 232 | for metric_name, metric_value in result.items(): 233 | data.append({ 234 | "Reranker": reranker_name, 235 | "Metric": metric_name, 236 | "Value": metric_value 237 | }) 238 | 239 | visual_df = pd.DataFrame(data) 240 | st.session_state["visual_df"] = visual_df 241 | 242 | else: 243 | st.warning("No results available. Please run the evaluation first.") 244 | 245 | 246 | if 'visual_df' in st.session_state and not st.session_state['visual_df'].empty: 247 | # Now, let's visualize the results for each reranker. 248 | with st.expander("See Visualization Results"): 249 | # Side-by-Side Bar Charts 250 | st.subheader("Side-by-Side Bar Charts") 251 | 252 | import plotly.express as px 253 | # Create the grouped bar chart with Plotly 254 | fig = px.bar(st.session_state["visual_df"], x="Metric", y="Value", color="Reranker", barmode='group', height=400) 255 | # Display the figure in the Streamlit app 256 | st.plotly_chart(fig) 257 | 258 | # Overlaid Line Charts 259 | st.subheader("Overlaid Line Charts") 260 | line_chart = st.line_chart(st.session_state["visual_df"].pivot(index='Metric', columns='Reranker', values='Value')) 261 | 262 | st.write("Dataframe for download") 263 | st.session_state["visual_df"] 264 | 265 | else: 266 | st.warning("No plots available. Please run the 'Prepare Charts' step first.") -------------------------------------------------------------------------------- /utils/query_translation.py: -------------------------------------------------------------------------------- 1 | from operator import itemgetter 2 | from langchain_openai import ChatOpenAI 3 | from langchain_core.runnables import RunnablePassthrough 4 | from langchain.prompts import ChatPromptTemplate 5 | from langchain_core.output_parsers import StrOutputParser 6 | from langchain_openai import ChatOpenAI 7 | from langchain.load import dumps, loads 8 | from operator import itemgetter 9 | from langchain_openai import ChatOpenAI 10 | from langchain_core.runnables import RunnablePassthrough, RunnableLambda 11 | from langchain.prompts import ChatPromptTemplate 12 | from langchain_core.output_parsers import StrOutputParser 13 | from langchain.load import dumps, loads 14 | from langchain import hub 15 | 16 | 17 | def generate_queries_multi(llm,retriever, question): 18 | # Multi Query: Different Perspectives 19 | template = """You are an AI language model assistant. Your task is to generate five 20 | different versions of the given user question to retrieve relevant documents from a vector 21 | database. By generating multiple perspectives on the user question, your goal is to help 22 | the user overcome some of the limitations of the distance-based similarity search. 23 | Provide these alternative questions separated by newlines. Original question: {question}""" 24 | prompt_perspectives = ChatPromptTemplate.from_template(template) 25 | 26 | generate_queries = ( 27 | prompt_perspectives 28 | | llm 29 | | StrOutputParser() 30 | | (lambda x: x.split("\n")) 31 | ) 32 | 33 | def get_unique_union(documents: list[list]): 34 | """ Unique union of retrieved docs """ 35 | # Flatten list of lists, and convert each Document to string 36 | flattened_docs = [dumps(doc) for sublist in documents for doc in sublist] 37 | # Get unique documents 38 | unique_docs = list(set(flattened_docs)) 39 | # Return 40 | return [loads(doc) for doc in unique_docs] 41 | 42 | # Retrieve 43 | # question = "What is task decomposition for LLM agents?" 44 | retrieval_chain = generate_queries | retriever.map() | get_unique_union 45 | docs = retrieval_chain.invoke({"question":question}) 46 | return docs, retrieval_chain 47 | 48 | def vanilla_rag(question, llm, retrieval_chain): 49 | # RAG 50 | template = """Answer the following question based on this context: 51 | 52 | {context} 53 | 54 | Question: {question} 55 | """ 56 | 57 | prompt = ChatPromptTemplate.from_template(template) 58 | 59 | llm = ChatOpenAI(temperature=0) 60 | 61 | final_rag_chain = ( 62 | {"context": retrieval_chain, 63 | "question": itemgetter("question")} 64 | | prompt 65 | | llm 66 | | StrOutputParser() 67 | ) 68 | 69 | return(final_rag_chain.invoke({"question":question})) 70 | 71 | def run_multi_query( llm, retriever, question): 72 | docs, retrieval_chain = generate_queries_multi(llm, retriever, question) 73 | answer = vanilla_rag(question, llm, retrieval_chain) 74 | return answer, docs 75 | 76 | def generate_queries_rag_fusion(llm, retriever, question): 77 | # RAG-Fusion: Related 78 | template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n 79 | Generate multiple search queries related to: {question} \n 80 | Output (4 queries):""" 81 | prompt_rag_fusion = ChatPromptTemplate.from_template(template) 82 | 83 | generate_queries = ( 84 | prompt_rag_fusion 85 | | llm 86 | | StrOutputParser() 87 | | (lambda x: x.split("\n")) 88 | ) 89 | 90 | def reciprocal_rank_fusion(results: list[list], k=60): 91 | """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 92 | and an optional parameter k used in the RRF formula """ 93 | 94 | # Initialize a dictionary to hold fused scores for each unique document 95 | fused_scores = {} 96 | 97 | # Iterate through each list of ranked documents 98 | for docs in results: 99 | # Iterate through each document in the list, with its rank (position in the list) 100 | for rank, doc in enumerate(docs): 101 | # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON) 102 | doc_str = dumps(doc) 103 | # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0 104 | if doc_str not in fused_scores: 105 | fused_scores[doc_str] = 0 106 | # Retrieve the current score of the document, if any 107 | previous_score = fused_scores[doc_str] 108 | # Update the score of the document using the RRF formula: 1 / (rank + k) 109 | fused_scores[doc_str] += 1 / (rank + k) 110 | 111 | # Sort the documents based on their fused scores in descending order to get the final reranked results 112 | reranked_results = [ 113 | (loads(doc), score) 114 | for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True) 115 | ] 116 | 117 | # Return the reranked results as a list of tuples, each containing the document and its fused score 118 | return reranked_results 119 | 120 | retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion 121 | docs = retrieval_chain_rag_fusion.invoke({"question": question}) 122 | return docs, retrieval_chain_rag_fusion 123 | 124 | def vanilla_rag_fusion(question, llm, retrieval_chain_rag_fusion): 125 | # RAG 126 | template = """Answer the following question based on this context: 127 | 128 | {context} 129 | 130 | Question: {question} 131 | """ 132 | 133 | prompt = ChatPromptTemplate.from_template(template) 134 | 135 | final_rag_chain = ( 136 | {"context": retrieval_chain_rag_fusion, 137 | "question": itemgetter("question")} 138 | | prompt 139 | | llm 140 | | StrOutputParser() 141 | ) 142 | 143 | return final_rag_chain.invoke({"question":question}) 144 | 145 | def run_rag_fusion(llm, retriever, question): 146 | docs, retrieval_chain_rag_fusion = generate_queries_rag_fusion(llm, retriever, question) 147 | contexts = [doc[0] for doc in docs] 148 | answer = vanilla_rag_fusion(question, llm, retrieval_chain_rag_fusion) 149 | return answer, contexts 150 | 151 | def generate_queries_decomposition(llm, question): 152 | # Decomposition 153 | template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n 154 | The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n 155 | Generate multiple search queries related to: {question} \n 156 | Output (3 queries):""" 157 | prompt_decomposition = ChatPromptTemplate.from_template(template) 158 | 159 | # Chain 160 | generate_queries_decomposition_chain = (prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n"))) 161 | 162 | # Run 163 | questions = generate_queries_decomposition_chain.invoke({"question":question}) 164 | return questions, generate_queries_decomposition_chain 165 | 166 | def answer_recursively(llm, retriever, question, questions): 167 | # Prompt 168 | template = """Here is the question you need to answer: 169 | 170 | \n --- \n {question} \n --- \n 171 | 172 | Here is any available background question + answer pairs: 173 | 174 | \n --- \n {q_a_pairs} \n --- \n 175 | 176 | Here is additional context relevant to the question: 177 | 178 | \n --- \n {context} \n --- \n 179 | 180 | Use the above context and any background question + answer pairs to answer the question: \n {question} 181 | """ 182 | 183 | decomposition_prompt = ChatPromptTemplate.from_template(template) 184 | 185 | def format_qa_pair(question, answer): 186 | """Format Q and A pair""" 187 | 188 | formatted_string = "" 189 | formatted_string += f"Question: {question}\nAnswer: {answer}\n\n" 190 | return formatted_string.strip() 191 | 192 | # 193 | all_contexts = [] 194 | q_a_pairs = "" 195 | for q in questions: 196 | context = retriever.invoke(q) 197 | # Add the formatted context to the list 198 | all_contexts.append(context) 199 | 200 | rag_chain = ( 201 | {"context": itemgetter("question") | retriever, 202 | "question": itemgetter("question"), 203 | "q_a_pairs": itemgetter("q_a_pairs")} 204 | | decomposition_prompt 205 | | llm 206 | | StrOutputParser()) 207 | 208 | answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs}) 209 | q_a_pair = format_qa_pair(q,answer) 210 | q_a_pairs = q_a_pairs + "\n---\n"+ q_a_pair 211 | all_context_flat_list = [context for contexts in all_contexts for context in contexts] 212 | return answer, all_context_flat_list 213 | 214 | def run_recursive_decomposition(llm, retriever, question): 215 | questions, _ = generate_queries_decomposition(llm, question) 216 | answer, contexts = answer_recursively(llm, retriever, question, questions) 217 | return answer, contexts 218 | 219 | def retrieve_and_rag(llm, retriever, question, sub_question_generator_chain): 220 | """RAG on each sub-question""" 221 | 222 | # Use our decomposition / 223 | sub_questions = sub_question_generator_chain.invoke({"question":question}) 224 | 225 | # Initialize a list to hold RAG chain results 226 | rag_results = [] 227 | 228 | retrieved_docs_list = [] 229 | 230 | # RAG prompt 231 | prompt_rag = hub.pull("rlm/rag-prompt") 232 | 233 | for sub_question in sub_questions: 234 | 235 | # Retrieve documents for each sub-question 236 | retrieved_docs = retriever.get_relevant_documents(sub_question) 237 | 238 | # Use retrieved documents and sub-question in RAG chain 239 | answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, 240 | "question": sub_question}) 241 | rag_results.append(answer) 242 | retrieved_docs_list.append(retrieved_docs) 243 | 244 | retrieved_docs_flat_list = [context for contexts in retrieved_docs_list for context in contexts] 245 | return rag_results, sub_questions, retrieved_docs_flat_list 246 | 247 | def format_qa_pairs(questions, answers): 248 | """Format Q and A pairs""" 249 | 250 | formatted_string = "" 251 | for i, (question, answer) in enumerate(zip(questions, answers), start=1): 252 | formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n" 253 | return formatted_string.strip() 254 | 255 | def answer_from_qa_pairs(llm, question, context): 256 | # Prompt 257 | template = """Here is a set of Q+A pairs: 258 | 259 | {context} 260 | 261 | Use these to synthesize an answer to the question: {question} 262 | """ 263 | 264 | prompt = ChatPromptTemplate.from_template(template) 265 | 266 | final_rag_chain = ( 267 | prompt 268 | | llm 269 | | StrOutputParser() 270 | ) 271 | 272 | return final_rag_chain.invoke({"context":context,"question":question}) 273 | 274 | def run_individual_decomposition(llm, retriever, question): 275 | prompt_rag = hub.pull("rlm/rag-prompt") 276 | _, generate_queries_decomposition_chain = generate_queries_decomposition(llm, question) 277 | answers, questions, retrieved_docs_flat_list = retrieve_and_rag(llm, retriever, question, generate_queries_decomposition_chain) 278 | context = format_qa_pairs(questions, answers) 279 | answer = answer_from_qa_pairs(llm, question, context) 280 | return answer, retrieved_docs_flat_list 281 | 282 | from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate 283 | 284 | def generate_queries_step_back(llm, question): 285 | # Few Shot Examples 286 | examples = [ 287 | { 288 | "input": "Could the members of The Police perform lawful arrests?", 289 | "output": "what can the members of The Police do?", 290 | }, 291 | { 292 | "input": "Jan Sindel's was born in what country?", 293 | "output": "what is Jan Sindel's personal history?", 294 | }, 295 | ] 296 | # We now transform these to example messages 297 | example_prompt = ChatPromptTemplate.from_messages( 298 | [ 299 | ("human", "{input}"), 300 | ("ai", "{output}"), 301 | ] 302 | ) 303 | few_shot_prompt = FewShotChatMessagePromptTemplate( 304 | example_prompt=example_prompt, 305 | examples=examples, 306 | ) 307 | prompt = ChatPromptTemplate.from_messages( 308 | [ 309 | ( 310 | "system", 311 | """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""", 312 | ), 313 | # Few shot examples 314 | few_shot_prompt, 315 | # New question 316 | ("user", "{question}"), 317 | ] 318 | ) 319 | generate_queries_step_back_chain = prompt | llm | StrOutputParser() 320 | queries = generate_queries_step_back_chain.invoke({"question": question}) 321 | return queries, generate_queries_step_back_chain 322 | 323 | def step_back_rag(llm, retriever, question, queries, generate_queries_step_back_chain): 324 | # Response prompt 325 | response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant. 326 | 327 | # {normal_context} 328 | # {step_back_context} 329 | 330 | # Original Question: {question} 331 | # Answer:""" 332 | response_prompt = ChatPromptTemplate.from_template(response_prompt_template) 333 | normal_context = retriever.invoke(question) 334 | # print(normal_context) 335 | step_back_context = retriever.invoke(generate_queries_step_back_chain.invoke({"question": question})) 336 | # print(step_back_context) 337 | chain = ( 338 | { 339 | # Retrieve context using the normal question 340 | "normal_context": RunnableLambda(lambda x: x["question"]) | retriever, 341 | # Retrieve context using the step-back question 342 | "step_back_context": generate_queries_step_back_chain | retriever, 343 | # Pass on the question 344 | "question": lambda x: x["question"], 345 | } 346 | | response_prompt 347 | | llm 348 | | StrOutputParser() 349 | ) 350 | answer = chain.invoke({"question": question}) 351 | combined_context = normal_context + step_back_context 352 | # print('normal', normal_context) 353 | # print('step', step_back_context) 354 | # print('combined', combined_context) 355 | return answer, combined_context 356 | 357 | def run_step_back_rag(llm, retriever, question): 358 | queries, generate_queries_step_back_chain = generate_queries_step_back(llm, question) 359 | # print(queries) 360 | answer, contexts = step_back_rag(llm, retriever, question, queries, generate_queries_step_back_chain) 361 | # print(contexts) 362 | return answer, contexts 363 | 364 | def generate_docs_for_retrieval(llm, question): 365 | # HyDE document genration 366 | template = """Please write a scientific paper passage to answer the question 367 | Question: {question} 368 | Passage:""" 369 | prompt_hyde = ChatPromptTemplate.from_template(template) 370 | 371 | generate_docs_for_retrieval_chain = ( 372 | prompt_hyde | llm | StrOutputParser() 373 | ) 374 | 375 | # Run 376 | # return generate_docs_for_retrieval_chain.invoke({"question":question}) 377 | return generate_docs_for_retrieval_chain 378 | 379 | def retrieve_hyde(retriever, question, generate_docs_for_retrieval_chain): 380 | # Retrieve 381 | retrieval_chain = generate_docs_for_retrieval_chain | retriever 382 | retrieved_docs = retrieval_chain.invoke({"question":question}) 383 | return retrieved_docs 384 | 385 | def hyde_rag(llm, question, retireved_docs): 386 | # RAG 387 | template = """Answer the following question based on this context: 388 | 389 | {context} 390 | 391 | Question: {question} 392 | """ 393 | 394 | prompt = ChatPromptTemplate.from_template(template) 395 | 396 | final_rag_chain = ( 397 | prompt 398 | | llm 399 | | StrOutputParser() 400 | ) 401 | 402 | return final_rag_chain.invoke({"context":retireved_docs,"question":question}) 403 | 404 | def run_hyde(llm, retriever, question): 405 | generate_docs_for_retrieval_chain = generate_docs_for_retrieval(llm, question) 406 | retireved_docs = retrieve_hyde(retriever, question, generate_docs_for_retrieval_chain) 407 | answer = hyde_rag(llm, question, retireved_docs) 408 | return answer, retireved_docs -------------------------------------------------------------------------------- /pages/2_embeddings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from langchain_community.document_loaders import DirectoryLoader 3 | from langchain.chains import RetrievalQA 4 | from langchain_openai import ChatOpenAI 5 | from langchain_community.chat_models import ChatCohere 6 | from langchain_community.embeddings import CohereEmbeddings 7 | from langchain import hub 8 | from langchain_community.vectorstores import Chroma 9 | from langchain_core.output_parsers import StrOutputParser 10 | from langchain_core.runnables import RunnablePassthrough 11 | from langchain_openai import OpenAIEmbeddings 12 | from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter, TokenTextSplitter 13 | from langchain_anthropic import ChatAnthropic 14 | from ragas.metrics import ( 15 | answer_relevancy, 16 | faithfulness, 17 | answer_similarity, 18 | answer_correctness, 19 | context_precision, 20 | context_recall, 21 | context_relevancy, 22 | ) 23 | import pandas as pd 24 | import random 25 | import string 26 | import plotly.express as px 27 | 28 | from st_pages import add_page_title 29 | 30 | add_page_title() 31 | 32 | st.sidebar.header("RAGTune") 33 | 34 | def generate_random_string(length=10): 35 | """Generate a random string of alphanumeric characters""" 36 | characters = string.ascii_letters + string.digits 37 | random_string = ''.join(random.choice(characters) for _ in range(length)) 38 | return random_string 39 | 40 | @st.cache_data 41 | def create_embedding_and_evaluate(parameter_name, chunk_size, chunk_overlap, number_of_source_documents, search_type, embeddings_option, splitter_type): 42 | if parameter_name == "Chunk Size": 43 | parameter_value = chunk_size 44 | elif parameter_name == "Chunk Overlap": 45 | parameter_value = chunk_overlap 46 | elif parameter_name == "Number of Source Documents": 47 | parameter_value = number_of_source_documents 48 | elif parameter_name == "Search Type": 49 | parameter_value = search_type 50 | elif parameter_name == "Embeddings Option": 51 | parameter_value = embeddings_option 52 | elif parameter_name == "Splitter": 53 | parameter_value = splitter_type 54 | 55 | if embeddings_option == 'CohereEmbeddings': 56 | embeddings = CohereEmbeddings() 57 | elif embeddings_option == 'OpenAIEmbeddings': 58 | embeddings = OpenAIEmbeddings() 59 | 60 | def format_docs(docs): 61 | return "\n\n".join(doc.page_content for doc in docs) 62 | 63 | if splitter_type == 'RecursiveCharacterTextSplitter': 64 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 65 | elif splitter_type == 'CharacterTextSplitter': 66 | text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 67 | elif splitter_type == 'TokenTextSplitter': 68 | text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 69 | 70 | 71 | # text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 72 | docs = loader.load() 73 | splits = text_splitter.split_documents(docs) 74 | vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="./chroma_db", collection_name=generate_random_string()) 75 | _retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs={"k": number_of_source_documents}) 76 | 77 | _prompt = hub.pull("rlm/rag-prompt") 78 | 79 | llm = llm_options[default_llm]() 80 | rag_chain = ( 81 | {"context": _retriever | format_docs, "question": RunnablePassthrough()} 82 | | _prompt 83 | | llm 84 | | StrOutputParser() 85 | ) 86 | 87 | answers = [] 88 | contexts = [] 89 | for question in eval_questions: 90 | response = rag_chain.invoke(question) 91 | answers.append(response) 92 | retrieved_docs = _retriever.invoke(question) 93 | contexts.append([context.page_content for context in retrieved_docs]) 94 | 95 | from datasets import Dataset 96 | response_dataset = Dataset.from_dict({ 97 | "question": eval_questions, 98 | "answer": answers, 99 | "contexts": contexts, 100 | "ground_truth": eval_answers 101 | }) 102 | 103 | from ragas import evaluate 104 | result = evaluate( 105 | response_dataset, 106 | metrics=[ 107 | faithfulness, 108 | answer_relevancy, 109 | answer_similarity, 110 | answer_correctness, 111 | context_precision, 112 | context_recall, 113 | context_relevancy, 114 | ], 115 | ) 116 | 117 | df_results = result.to_pandas() 118 | return { 119 | "parameter_name": parameter_name, 120 | "parameter_value": parameter_value, 121 | "overview": result, 122 | "details": df_results 123 | } 124 | 125 | # @st.cache_data 126 | def display_embedding_evaluation_results(current_parameter_name): 127 | if "embedding_results" in st.session_state and st.session_state["embedding_results"]: 128 | st.subheader("Step 3: View Embedding Evaluation Results") 129 | with st.expander("See Evaluation Results"): 130 | for result in st.session_state["embedding_results"]: 131 | # st.write(result) 132 | if result['parameter_name'] == current_parameter_name: 133 | st.write(f"Evaluation for {result['parameter_name']}: {result['parameter_value']}") 134 | st.write("Overview of Results:") 135 | st.write(result["overview"]) 136 | st.write("Details of Results:") 137 | st.dataframe(result["details"], use_container_width=True) 138 | # @st.cache_data 139 | def prepare_charts(current_parameter_name, is_barchart=False): 140 | st.subheader("Step 4: Visualize Data") 141 | data = [] 142 | for result in st.session_state["embedding_results"]: 143 | if result['parameter_name'] == current_parameter_name: 144 | overview = result["overview"] 145 | parameter_name = result["parameter_name"] 146 | parameter_value = result["parameter_value"] 147 | for metric_name, metric_value in overview.items(): 148 | data.append({ 149 | "Parameter": parameter_name, 150 | "Parameter Value": parameter_value, 151 | "Metric": metric_name, 152 | "Value": metric_value 153 | }) 154 | 155 | if data: 156 | visual_df = pd.DataFrame(data) 157 | st.session_state["visual_df"] = visual_df 158 | else: 159 | st.warning(f"No data available for parameter '{current_parameter_name}'. Please run the evaluation for this parameter.") 160 | 161 | if 'visual_df' in st.session_state and not st.session_state['visual_df'].empty: 162 | with st.expander("See Visualization Results"): 163 | if is_barchart: 164 | st.subheader("Grouped Bar Chart") 165 | fig = px.bar(visual_df, x="Metric", y="Value", color="Parameter Value", barmode='group', height=400) 166 | st.plotly_chart(fig) 167 | else: 168 | st.subheader("Scatter Charts") 169 | for metric in st.session_state["visual_df"]["Metric"].unique(): 170 | fig = px.scatter(st.session_state["visual_df"][st.session_state["visual_df"]["Metric"] == metric], 171 | x="Parameter Value", y="Value", title=f"{current_parameter_name} vs {metric}", 172 | labels={"Parameter Value": current_parameter_name, "Value": metric}) 173 | st.plotly_chart(fig) 174 | 175 | st.write("Dataframe for download") 176 | st.dataframe(st.session_state["visual_df"], use_container_width=True) 177 | else: 178 | st.warning("No plots available. Please run the 'Prepare Charts' step first.") 179 | 180 | 181 | 182 | # Load data from session state 183 | if 'eval_questions' not in st.session_state or 'eval_answers' not in st.session_state or 'doc_path' not in st.session_state: 184 | st.warning("Please upload a document and save eval questions and answers") 185 | else: 186 | eval_questions = st.session_state['eval_questions'] 187 | eval_answers = st.session_state['eval_answers'] 188 | doc_path = st.session_state['doc_path'] 189 | 190 | loader = DirectoryLoader(doc_path, show_progress=True, use_multithreading=True) 191 | 192 | # Step 1: Select default LLM model 193 | st.subheader("Step 1: Select default LLM settings") 194 | llm_options = { 195 | "Cohere - command-light": lambda: ChatCohere(temperature=temperature, max_tokens=max_tokens), 196 | "Cohere - command": lambda: ChatCohere(temperature=temperature, max_tokens=max_tokens), 197 | "Cohere - command-r": lambda: ChatCohere(temperature=temperature, max_tokens=max_tokens), 198 | "OpenAI - gpt-3.5-turbo": lambda: ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens), 199 | "OpenAI - gpt-4-turbo-preview": lambda: ChatOpenAI(model_name="gpt-4-turbo-preview", temperature=temperature, max_tokens=max_tokens), 200 | "OpenAI - gpt-4": lambda: ChatOpenAI(model_name="gpt-4", temperature=temperature, max_tokens=max_tokens), 201 | "Anthropic - claude-3-opus-20240229": lambda: ChatAnthropic(model_name="claude-3-opus-20240229", temperature=temperature, max_tokens=max_tokens), 202 | "Anthropic - claude-3-sonnet-20240229": lambda: ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, max_tokens=max_tokens), 203 | "Anthropic - claude-3-haiku-20240307": lambda: ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, max_tokens=max_tokens), 204 | } 205 | default_llm = st.selectbox('Select the default LLM model:', options=list(llm_options.keys())) 206 | temperature = st.slider('Select the temperature for the language model:', min_value=0.0, max_value=2.0, value=0.7) 207 | max_tokens = st.number_input('Enter the max tokens for the output:', min_value=0, max_value=4096, value=400) 208 | 209 | # Step 2: Set ranges for embedding options 210 | st.subheader("Step 2: Select a tab below for evaluation") 211 | 212 | 213 | # Create tabs for each embedding option 214 | tab_chunk_size, tab_chunk_overlap, tab_number_of_source_documents, tab_search_type, tab_embeddings_option, tab_splitter = st.tabs(["Chunk Size", "Chunk Overlap", "Number of Source Documents", "Search Type", "Embeddings Option", "Splitter"]) 215 | 216 | # Chunk Size tab 217 | with tab_chunk_size: 218 | current_parameter_name = "Chunk Size" 219 | st.session_state["parameter_name"] = current_parameter_name 220 | 221 | st.write("Configure the varying parameter") 222 | chunk_size = st.select_slider( 223 | 'Select the range for chunk size for text splitting:', 224 | options=list(range(50, 5001, 50)), 225 | value=(50, 5000), 226 | key = current_parameter_name 227 | ) 228 | chunk_size_min, chunk_size_max = chunk_size 229 | chunk_size_data_points = st.slider('Number of data points to collect between chunk size range:', min_value=2, max_value=10, value=3, step=1) 230 | 231 | st.divider() 232 | st.write("Set the constant parameters") 233 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=50, key = current_parameter_name+'_overlap') 234 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4, key = current_parameter_name+'_k') 235 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr'), key = current_parameter_name+'_search_type') 236 | embeddings_option = st.selectbox( 237 | 'Select the embeddings to use:', 238 | ('CohereEmbeddings', 'OpenAIEmbeddings'), 239 | key=current_parameter_name+'_embedding' 240 | ) 241 | splitter_type = st.selectbox( 242 | 'Select the text splitter type:', 243 | ('RecursiveCharacterTextSplitter', 'CharacterTextSplitter', 'TokenTextSplitter'), 244 | key=current_parameter_name+'_splitter' 245 | ) 246 | 247 | if st.button('Evaluate Chunk Size'): 248 | 249 | 250 | 251 | chunk_size_step = ( chunk_size_max - chunk_size_min + 1) // chunk_size_data_points 252 | chunk_size_step = max(chunk_size_step-1 , 1) # Adjust step size to ensure it's at least 1 253 | chunk_size_range = range(chunk_size_min, chunk_size_max + 1, chunk_size_step) 254 | embedding_results = [] 255 | st.session_state["embedding_results"] = embedding_results 256 | 257 | for chunk_size in chunk_size_range: 258 | result = create_embedding_and_evaluate(current_parameter_name, chunk_size, chunk_overlap, number_of_source_documents, search_type, embeddings_option, splitter_type) 259 | embedding_results.append(result) 260 | 261 | display_embedding_evaluation_results(current_parameter_name) 262 | prepare_charts(current_parameter_name) 263 | 264 | 265 | st.session_state["embedding_results"] = embedding_results 266 | 267 | # Chunk Overlap tab 268 | with tab_chunk_overlap: 269 | current_parameter_name = "Chunk Overlap" 270 | st.session_state["parameter_name"] = current_parameter_name 271 | 272 | st.write("Configure the varying parameter") 273 | chunk_overlap = st.select_slider( 274 | 'Select the range for chunk overlap for text splitting:', 275 | options=list(range(0, 501, 20)), 276 | value=(0, 500), 277 | key = current_parameter_name 278 | ) 279 | chunk_overlap_min, chunk_overlap_max = chunk_overlap 280 | chunk_overlap_data_points = st.slider('Number of data points to collect between chunk overlap range:', min_value=2, max_value=10, value=3, step=1) 281 | 282 | st.divider() 283 | st.write("Set the constant parameters") 284 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=50, value=1000, key = current_parameter_name+'_size') 285 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4, key = current_parameter_name+'_k') 286 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr'), key = current_parameter_name+'_search_type') 287 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings'), key = current_parameter_name+'_embedding') 288 | splitter_type = st.selectbox( 289 | 'Select the text splitter type:', 290 | ('RecursiveCharacterTextSplitter', 'CharacterTextSplitter', 'TokenTextSplitter'), 291 | key=current_parameter_name+'_splitter' 292 | ) 293 | 294 | 295 | if st.button('Evaluate Chunk Overlap'): 296 | chunk_overlap_step = (chunk_overlap_max - chunk_overlap_min + 1) // chunk_overlap_data_points 297 | chunk_overlap_step = max(chunk_overlap_step, 1) # Adjust step size to ensure it's at least 1 298 | chunk_overlap_range = range(chunk_overlap_min, chunk_overlap_max + 1, chunk_overlap_step) 299 | embedding_results = [] 300 | st.session_state["embedding_results"] = embedding_results 301 | 302 | for chunk_overlap in chunk_overlap_range: 303 | result = create_embedding_and_evaluate(current_parameter_name, chunk_size, chunk_overlap, number_of_source_documents, search_type, embeddings_option, splitter_type) 304 | embedding_results.append(result) 305 | 306 | display_embedding_evaluation_results(current_parameter_name) 307 | prepare_charts(current_parameter_name) 308 | 309 | st.session_state["embedding_results"] = embedding_results 310 | 311 | # Number of Source Documents tab 312 | with tab_number_of_source_documents: 313 | current_parameter_name = "Number of Source Documents" 314 | st.session_state["parameter_name"] = current_parameter_name 315 | 316 | st.write("Configure the varying parameter") 317 | number_of_source_documents = st.select_slider( 318 | 'Select the range for number of source documents for retrieval:', 319 | options=list(range(2, 11, 1)), 320 | value=(2, 10), 321 | key = current_parameter_name 322 | ) 323 | number_of_source_documents_min, number_of_source_documents_max = number_of_source_documents 324 | number_of_source_documents_data_points = st.slider('Number of data points to collect between number of source documents range:', min_value=2, max_value=10, value=3, step=1) 325 | 326 | st.divider() 327 | st.write("Set the constant parameters") 328 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=50, value=1000, key = current_parameter_name+'_size') 329 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=50, key = current_parameter_name+'_overlap') 330 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr'), key = current_parameter_name+'_search_type') 331 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings'), key = current_parameter_name+'_embedding') 332 | splitter_type = st.selectbox( 333 | 'Select the text splitter type:', 334 | ('RecursiveCharacterTextSplitter', 'CharacterTextSplitter', 'TokenTextSplitter'), 335 | key=current_parameter_name+'_splitter' 336 | ) 337 | if st.button('Evaluate Number of Source Documents'): 338 | number_of_source_documents_step = (number_of_source_documents_max - number_of_source_documents_min + 1) // number_of_source_documents_data_points 339 | number_of_source_documents_step = max(number_of_source_documents_step, 1) # Adjust step size to ensure it's at least 1 340 | number_of_source_documents_range = range(number_of_source_documents_min, number_of_source_documents_max + 1, number_of_source_documents_step) 341 | embedding_results = [] 342 | st.session_state["embedding_results"] = embedding_results 343 | 344 | for number_of_source_documents in number_of_source_documents_range: 345 | result = create_embedding_and_evaluate(current_parameter_name, chunk_size, chunk_overlap, number_of_source_documents, search_type, embeddings_option, splitter_type) 346 | embedding_results.append(result) 347 | 348 | display_embedding_evaluation_results(current_parameter_name) 349 | prepare_charts(current_parameter_name) 350 | 351 | st.session_state["embedding_results"] = embedding_results 352 | 353 | # Search Type tab 354 | with tab_search_type: 355 | current_parameter_name = "Search Type" 356 | st.session_state["parameter_name"] = current_parameter_name 357 | 358 | st.write("Configure the varying parameter") 359 | search_types = st.multiselect('Select the search types:', ['similarity', 'mmr'],key = current_parameter_name) 360 | 361 | st.divider() 362 | st.write("Set the constant parameters") 363 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=50, value=1000, key = current_parameter_name+'_size') 364 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=50, key = current_parameter_name+'_overlap') 365 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4, key = current_parameter_name+'_k') 366 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings'), key = current_parameter_name+'_embedding') 367 | splitter_type = st.selectbox( 368 | 'Select the text splitter type:', 369 | ('RecursiveCharacterTextSplitter', 'CharacterTextSplitter', 'TokenTextSplitter'), 370 | key=current_parameter_name+'_splitter' 371 | ) 372 | if st.button('Evaluate Search Type'): 373 | embedding_results = [] 374 | st.session_state["embedding_results"] = embedding_results 375 | 376 | for search_type in search_types: 377 | result = create_embedding_and_evaluate(current_parameter_name, chunk_size, chunk_overlap, number_of_source_documents, search_type, embeddings_option, splitter_type) 378 | embedding_results.append(result) 379 | 380 | display_embedding_evaluation_results(current_parameter_name) 381 | prepare_charts(current_parameter_name) 382 | 383 | st.session_state["embedding_results"] = embedding_results 384 | 385 | # Embeddings Option tab 386 | with tab_embeddings_option: 387 | current_parameter_name = "Embeddings Option" 388 | st.session_state["parameter_name"] = current_parameter_name 389 | 390 | st.write("Configure the varying parameter") 391 | embeddings_options = st.multiselect('Select the embeddings to use:', ['CohereEmbeddings', 'OpenAIEmbeddings'],key = current_parameter_name) 392 | 393 | st.divider() 394 | st.write("Set the constant parameters") 395 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=50, value=1000, key = current_parameter_name+'_size') 396 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=50, key = current_parameter_name+'_overlap') 397 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4, key = current_parameter_name+'_k') 398 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr'), key = current_parameter_name+'_search_type') 399 | splitter_type = st.selectbox( 400 | 'Select the text splitter type:', 401 | ('RecursiveCharacterTextSplitter', 'CharacterTextSplitter', 'TokenTextSplitter'), 402 | key=current_parameter_name+'_splitter' 403 | ) 404 | if st.button('Evaluate Embeddings Option'): 405 | embedding_results = [] 406 | st.session_state["embedding_results"] = embedding_results 407 | for embeddings_option in embeddings_options: 408 | result = create_embedding_and_evaluate(current_parameter_name,chunk_size , chunk_overlap,number_of_source_documents, search_type,embeddings_option , splitter_type ) 409 | embedding_results.append(result) 410 | 411 | display_embedding_evaluation_results(current_parameter_name) 412 | prepare_charts(current_parameter_name, is_barchart=True) 413 | 414 | st.session_state["embedding_results"] = embedding_results 415 | 416 | with tab_splitter: 417 | current_parameter_name = "Splitter" 418 | st.session_state["parameter_name"] = current_parameter_name 419 | 420 | st.write("Configure the varying parameter") 421 | splitter_options = st.multiselect('Select the text splitters:', ['RecursiveCharacterTextSplitter', 'CharacterTextSplitter', 'TokenTextSplitter'], default=['RecursiveCharacterTextSplitter']) 422 | 423 | st.divider() 424 | st.write("Set the constant parameters") 425 | chunk_size = st.number_input('Enter the chunk size for text splitting:', min_value=50, value=200, key=current_parameter_name+'_chunk_size') 426 | chunk_overlap = st.number_input('Enter the chunk overlap for text splitting:', min_value=0, value=50, key=current_parameter_name+'_overlap') 427 | number_of_source_documents = st.slider('Select the number of source documents for retrieval:', min_value=2, max_value=10, value=4, key=current_parameter_name+'_k') 428 | search_type = st.selectbox('Select the search type:', ('similarity', 'mmr'), key=current_parameter_name+'_search_type') 429 | embeddings_option = st.selectbox('Select the embeddings to use:', ('CohereEmbeddings', 'OpenAIEmbeddings'), key=current_parameter_name+'_embedding') 430 | 431 | if st.button('Evaluate Splitters'): 432 | embedding_results = [] 433 | st.session_state["embedding_results"] = embedding_results 434 | 435 | for splitter in splitter_options: 436 | result = create_embedding_and_evaluate(current_parameter_name, chunk_size, chunk_overlap, number_of_source_documents, search_type, embeddings_option, splitter) 437 | embedding_results.append(result) 438 | 439 | display_embedding_evaluation_results(current_parameter_name) 440 | prepare_charts(current_parameter_name, is_barchart=True) 441 | 442 | st.session_state["embedding_results"] = embedding_results 443 | 444 | # Visualization code 445 | 446 | # current_parameter_name = st.session_state.get("parameter_name", None) 447 | 448 | # Call the function with the current parameter name 449 | # st.session_state["embedding_results"] --------------------------------------------------------------------------------