├── .gitignore ├── LICENSE ├── src ├── README.md └── app.py ├── requirements.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Aaron Jimenez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | You can change and customize the code to test different models and configurations. 2 | 3 | ### Embeddings models 4 | 5 | For this project, I tried 3 different embeddings model to check performance: 6 | 7 | #### HuggingFaceHubEmbeddings() 8 | 9 | ```python 10 | from langchain_community.embeddings import HuggingFaceHubEmbeddings 11 | 12 | ... 13 | vectore_store = Chroma.from_documents(document_chunks, HuggingFaceHubEmbeddings()) 14 | ``` 15 | 16 | #### phi3 with ollama 17 | ```python 18 | from langchain_community.embeddings import OllamaEmbeddings 19 | 20 | ... 21 | embeddings = OllamaEmbeddings(model='phi3') 22 | vectore_store = Chroma.from_documents(document_chunks, embeddings) 23 | ``` 24 | 25 | #### nomic-embed-text with ollama 26 | 27 | ```bash 28 | ollama run nomic-embed-text 29 | ``` 30 | 31 | ```python 32 | from langchain_community.embeddings import OllamaEmbeddings 33 | 34 | ... 35 | embeddings = OllamaEmbeddings(model='nomic-embed-text') 36 | vectore_store = Chroma.from_documents(document_chunks, embeddings) 37 | ``` 38 | 39 | ### LLm 40 | 41 | I used `phi-3` model with ollama. 42 | 43 | ```bash 44 | ollama run phi3 45 | ``` 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.3 2 | aiosignal==1.3.1 3 | altair==5.2.0 4 | annotated-types==0.6.0 5 | anyio==4.3.0 6 | asgiref==3.7.2 7 | async-timeout==4.0.3 8 | attrs==23.2.0 9 | backoff==2.2.1 10 | bcrypt==4.1.2 11 | beautifulsoup4==4.12.3 12 | blinker==1.7.0 13 | build==1.0.3 14 | cachetools==5.3.3 15 | certifi==2024.2.2 16 | charset-normalizer==3.3.2 17 | chroma-hnswlib==0.7.3 18 | chromadb==0.4.24 19 | click==8.1.7 20 | coloredlogs==15.0.1 21 | dataclasses-json==0.6.4 22 | Deprecated==1.2.14 23 | exceptiongroup==1.2.0 24 | fastapi==0.110.0 25 | filelock==3.13.1 26 | flatbuffers==23.5.26 27 | frozenlist==1.4.1 28 | fsspec==2024.2.0 29 | gitdb==4.0.11 30 | GitPython==3.1.42 31 | google-auth==2.28.1 32 | googleapis-common-protos==1.62.0 33 | greenlet==3.0.3 34 | grpcio==1.62.0 35 | h11==0.14.0 36 | httptools==0.6.1 37 | huggingface-hub==0.21.3 38 | humanfriendly==10.0 39 | idna==3.6 40 | importlib-metadata==6.11.0 41 | importlib_resources==6.1.2 42 | Jinja2==3.1.3 43 | jsonpatch==1.33 44 | jsonpointer==2.4 45 | jsonschema==4.21.1 46 | jsonschema-specifications==2023.12.1 47 | kubernetes==29.0.0 48 | langchain==0.1.9 49 | langchain-community==0.0.24 50 | langchain-core==0.1.27 51 | langsmith==0.1.10 52 | markdown-it-py==3.0.0 53 | MarkupSafe==2.1.5 54 | marshmallow==3.21.0 55 | mdurl==0.1.2 56 | mmh3==4.1.0 57 | monotonic==1.6 58 | mpmath==1.3.0 59 | multidict==6.0.5 60 | mypy-extensions==1.0.0 61 | numpy==1.26.4 62 | oauthlib==3.2.2 63 | onnxruntime==1.17.1 64 | opentelemetry-api==1.23.0 65 | opentelemetry-exporter-otlp-proto-common==1.23.0 66 | opentelemetry-exporter-otlp-proto-grpc==1.23.0 67 | opentelemetry-instrumentation==0.44b0 68 | opentelemetry-instrumentation-asgi==0.44b0 69 | opentelemetry-instrumentation-fastapi==0.44b0 70 | opentelemetry-proto==1.23.0 71 | opentelemetry-sdk==1.23.0 72 | opentelemetry-semantic-conventions==0.44b0 73 | opentelemetry-util-http==0.44b0 74 | orjson==3.9.15 75 | overrides==7.7.0 76 | packaging==23.2 77 | pandas==2.2.1 78 | pillow==10.2.0 79 | posthog==3.4.2 80 | protobuf==4.25.3 81 | pulsar-client==3.4.0 82 | pyarrow==15.0.0 83 | pyasn1==0.5.1 84 | pyasn1-modules==0.3.0 85 | pydantic==2.6.3 86 | pydantic_core==2.16.3 87 | pydeck==0.8.1b0 88 | Pygments==2.17.2 89 | PyPika==0.48.9 90 | pyproject_hooks==1.0.0 91 | python-dateutil==2.8.2 92 | python-dotenv==1.0.1 93 | pytz==2024.1 94 | PyYAML==6.0.1 95 | referencing==0.33.0 96 | requests==2.31.0 97 | requests-oauthlib==1.3.1 98 | rich==13.7.1 99 | rpds-py==0.18.0 100 | rsa==4.9 101 | six==1.16.0 102 | smmap==5.0.1 103 | sniffio==1.3.1 104 | soupsieve==2.5 105 | SQLAlchemy==2.0.27 106 | starlette==0.36.3 107 | streamlit==1.31.1 108 | sympy==1.12 109 | tenacity==8.2.3 110 | tokenizers==0.15.2 111 | toml==0.10.2 112 | tomli==2.0.1 113 | toolz==0.12.1 114 | tornado==6.4 115 | tqdm==4.66.2 116 | typer==0.9.0 117 | typing-inspect==0.9.0 118 | typing_extensions==4.10.0 119 | tzdata==2024.1 120 | tzlocal==5.2 121 | urllib3==2.2.1 122 | uvicorn==0.27.1 123 | uvloop==0.19.0 124 | validators==0.22.0 125 | watchdog==4.0.0 126 | watchfiles==0.21.0 127 | websocket-client==1.7.0 128 | websockets==12.0 129 | wrapt==1.16.0 130 | yarl==1.9.4 131 | zipp==3.17.0 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open Source Web Chatbot using RAG 2 | --- 3 | This project implements a web-based chatbot using the `LangChain` framework, `phi-3` model as llm with `ollama`, `chromadb` as vectorDB and `streamlit` as frontend. The chatbot is designed to interact with users based on the content of a specified website. 4 | 5 | ## Features 6 | 7 | - **Web-based Interface**: Users can interact with the chatbot through a web interface. 8 | - **Document Loading**: The chatbot loads content from a specified website to understand the context of the conversation. 9 | - **Text Splitting**: The content from the website is split into chunks for processing. 10 | - **Vector Store Creation**: Chunks of text are converted into vectors and stored in a vector store for efficient retrieval. 11 | - **RAG (Retrieval-Augmented Generation)**: The chatbot uses RAG to improve the quality of its responses. RAG involves two main components: a retriever chain and a conversation RAG chain. 12 | 13 | ## Setup 14 | 15 | To run the project, follow these steps: 16 | 17 | 1. Install [ollama](https://ollama.com) in you machine and select a llm to use. 18 | 19 | 2. Install the required dependencies of the project: 20 | 21 | ```bash 22 | pip install streamlit langchain beautifulsoup4 chromadb huggingface_hub 23 | ``` 24 | 25 | 3. Run the Streamlit app: 26 | 27 | ```bash 28 | streamlit run src/app.py 29 | ``` 30 | 31 | 4. Once the Streamlit app is running, enter a website URL in the sidebar and start chatting with the chatbot. 32 | 33 | ## What is RAG? 34 | 35 | >*Retrieval-Augmented Generation (RAG) is the process of optimizing the output of a large language model, so it references an authoritative knowledge base outside of its training data sources before generating a response. Large Language Models (LLMs) are trained on vast volumes of data and use billions of parameters to generate original output for tasks like answering questions, translating languages, and completing sentences. RAG extends the already powerful capabilities of LLMs to specific domains or an organization's internal knowledge base, all without the need to retrain the model. It is a cost-effective approach to improving LLM output so it remains relevant, accurate, and useful in various contexts.* [Read more here](https://aws.amazon.com/what-is/retrieval-augmented-generation/) 36 | 37 | ### RAG Architecture 38 | 39 | Follow the [LangChain documentation](python.langchain.com/docs/use_cases/question_answering/), a typical RAG application has two main components: 40 | 41 | Indexing: 42 | ![Indexing](https://python.langchain.com/assets/images/rag_indexing-8160f90a90a33253d0154659cf7d453f.png){width=90%} 43 | 44 | Retrieval and generation: 45 | ![Retrieval and generation](https://python.langchain.com/assets/images/rag_retrieval_generation-1046a4668d6bb08786ef73c56d4f228a.png){width=90%} 46 | 47 | ## Reference 48 | 49 | * [Tutorial | Chat with any Website using Python and Langchain (LATEST VERSION)](https://www.youtube.com/watch?v=bupx08ZgSFg) 50 | 51 | * [Documentation: ollama llm in LangChain](https://python.langchain.com/docs/integrations/llms/ollama) 52 | 53 | * [Documentation: ollama embeddings class in LangChain](https://python.langchain.com/docs/integrations/text_embedding/ollama) 54 | 55 | * [Documentation: Hugging Face Embedding class in LangChain](https://python.langchain.com/docs/integrations/text_embedding/huggingfacehub) 56 | 57 | * [Documentation: Q&A with RAG](https://python.langchain.com/docs/use_cases/question_answering/) 58 | 59 | * [Nomic's New Embedding Model | nomic-embed-text](https://www.youtube.com/watch?v=LpcaeQZDVB8) 60 | 61 | * [Introducing Nomic Embed: A Truly Open Embedding Model](https://blog.nomic.ai/posts/nomic-embed-text-v1) 62 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from langchain_core.messages import AIMessage, HumanMessage 3 | from langchain_community.document_loaders import WebBaseLoader 4 | from langchain.text_splitter import RecursiveCharacterTextSplitter 5 | from langchain_community.vectorstores import Chroma 6 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 7 | from langchain.chains import create_history_aware_retriever, create_retrieval_chain 8 | from langchain_community.llms import Ollama 9 | from langchain.chains.combine_documents import create_stuff_documents_chain 10 | from langchain_community.embeddings import OllamaEmbeddings 11 | 12 | 13 | def get_vectorStrore_from_url(url): 14 | # load the html text from the document and split it into chunks 15 | # 16 | # store the chunk in a vectore store 17 | # 18 | loader = WebBaseLoader(url) 19 | document = loader.load() 20 | 21 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) # To do: test performance 22 | document_chunks = text_splitter.split_documents(document) 23 | 24 | embeddings = OllamaEmbeddings(model='nomic-embed-text') 25 | vectore_store = Chroma.from_documents(document_chunks, embeddings) 26 | 27 | return vectore_store 28 | 29 | def get_context_retriever_chain(vector_store): 30 | # set up the llm, retriver and prompt to the retriver_chain 31 | # 32 | # retriver_chain -> retrieve relevant information from the database 33 | # 34 | llm = Ollama(model='phi3') # "or any other model that you have" 35 | 36 | retriver = vector_store.as_retriever(k=2) # To do: test `k` 37 | 38 | prompt = ChatPromptTemplate.from_messages( 39 | [ 40 | MessagesPlaceholder(variable_name="chat_history"), 41 | ("user", "{input}"), 42 | ("user", "Given the above conversation, generate a search query to look up in order to get the information relevant to the conversation") 43 | ] 44 | ) 45 | 46 | retriver_chain = create_history_aware_retriever( 47 | llm, 48 | retriver, 49 | prompt 50 | ) 51 | 52 | return retriver_chain 53 | 54 | def get_conversation_rag_chain(retriever_chain): 55 | # summarize the contents of the context obtained from the webpage 56 | # 57 | # based on context generate the answer of the question 58 | # 59 | llm = Ollama(model='phi3') # "or any other model that you have" 60 | 61 | prompt = ChatPromptTemplate.from_messages( 62 | [ 63 | ( 64 | "system", 65 | "Answer the user's questions based on the below context:\n\n{context}" 66 | ), 67 | MessagesPlaceholder(variable_name="chat_history"), 68 | ("user", "{input}"), 69 | ] 70 | ) 71 | 72 | stuff_document_chain = create_stuff_documents_chain(llm,prompt) 73 | 74 | return create_retrieval_chain(retriever_chain, stuff_document_chain) 75 | 76 | def get_response(user_input): 77 | # invokes the chains created to generate a response to a given user query 78 | # 79 | retriver_chain = get_context_retriever_chain(st.session_state.vector_store) 80 | conversation_rag_chain = get_conversation_rag_chain(retriver_chain) 81 | 82 | response = conversation_rag_chain.invoke({ 83 | "chat_history": st.session_state.chat_history, 84 | "input": user_query 85 | }) 86 | 87 | return response['answer'] 88 | 89 | 90 | # streamlit app config 91 | # 92 | st.set_page_config(page_title="Lets chat with a Website", page_icon="💻") 93 | st.title("Lets chat with a Website") 94 | 95 | # sidebar setup 96 | with st.sidebar: 97 | st.header("Setting") 98 | website_url = st.text_input("Type the URL here") 99 | 100 | if website_url is None or website_url == "": 101 | st.info("Please enter a website URL...") 102 | 103 | else: 104 | # Session State 105 | # 106 | # Check the chat history for follow the conversation 107 | if "chat_history" not in st.session_state: 108 | st.session_state.chat_history = [ 109 | AIMessage(content="Hello, I am a bot. How can I help you?"), 110 | ] 111 | # Check if there are already info stored in the vectorDB 112 | if "vector_store" not in st.session_state: 113 | st.session_state.vector_store = get_vectorStrore_from_url(website_url) 114 | 115 | # user input 116 | user_query = st.chat_input("Type here...") 117 | if user_query is not None and user_query != "": 118 | 119 | response = get_response(user_query) 120 | 121 | st.session_state.chat_history.append(HumanMessage(content=user_query)) 122 | st.session_state.chat_history.append(AIMessage(content=response)) 123 | 124 | # conversation history 125 | for message in st.session_state.chat_history: 126 | if isinstance(message, AIMessage): 127 | with st.chat_message("AI"): 128 | st.write(message.content) 129 | elif isinstance(message, HumanMessage): 130 | with st.chat_message("Human"): 131 | st.write(message.content) 132 | --------------------------------------------------------------------------------