├── .gitignore ├── .streamlit └── config.toml ├── README.md ├── app.py ├── assets ├── llamaindexchat-demo.mp4 ├── llamaindexchat.png └── sourcecitation.png ├── ingest_knowledge.py ├── requirements.txt └── storage ├── docstore.json ├── graph_store.json ├── index_store.json └── vector_store.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [logger] 2 | 3 | # Level of logging: 'error', 'warning', 'info', or 'debug'. 4 | level = 'info' 5 | 6 | # String format for logging messages. If logger.datetimeFormat is set, logger messages will default to `%(asctime)s.%(msecs)03d %(message)s`. See [Python's documentation](https://docs.python.org/2.6/library/logging.html#formatter-objects) for available attributes. 7 | # Default: "%(asctime)s %(message)s" 8 | messageFormat = "%(asctime)s [%(levelname)s] %(message)s" 9 | 10 | [browser] 11 | 12 | # Default: true 13 | gatherUsageStats = false 14 | 15 | [theme] 16 | 17 | primaryColor="#818cf8" 18 | backgroundColor="#FFFFFF" 19 | secondaryBackgroundColor="#F0F2F6" 20 | textColor="#262730" 21 | font="sans serif" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Open_inStreamlit](https://img.shields.io/badge/Open%20In-Streamlit-red?logo=Streamlit)](https://llamaindexchat.streamlit.app/) 2 | [![Python](https://img.shields.io/badge/python-%203.8-blue.svg)](https://www.python.org/) 3 | [![CodeFactor](https://www.codefactor.io/repository/github/dcarpintero/llamaindexchat/badge)](https://www.codefactor.io/repository/github/dcarpintero/llamaindexchat) 4 | [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/dcarpintero/st-newsapi-connector/blob/main/LICENSE) 5 | 6 | # Chat with 🦙 LlamaIndex Docs 🗂️ 7 | 8 |

9 | 10 |

11 | 12 | Chatbot using [LlamaIndex](https://www.llamaindex.ai/) to supplement OpenAI GPT-3.5 Large Language Model (LLM) with the [LlamaIndex Documentation](https://gpt-index.readthedocs.io/en/latest/index.html). Main features: 13 | 14 | - **Transparency and Evaluation**: by customizing the metadata field of documents (and nodes), the App is able to provide links to the sources of the responses, along with the author and relevance score of each source node. This ensures the answers can be cross-referenced with the original content to check for accuracy. 15 | - **Estimating Inference Costs**: tracks 'LLM Prompt Tokens' and 'LLM Completion Tokens' to help keep inference costs under control. 16 | - **Reducing Costs**: persists storage including embedding vectors, and caches the questions / responses to reduce the number of calls to the LLM. 17 | - **Usability**: includes suggestions for questions, and basic functionality to clear chat history. 18 | 19 | ## 🦙 What's LlamaIndex? 20 | 21 | > *LlamaIndex is a simple, flexible data framework for connecting custom data sources to large language models. [...] It helps in preparing a knowledge base by ingesting data from different sources and formats using data connectors. The data is then represented as documents and nodes, where a node is the atomic unit of data in LlamaIndex. Once the data is ingested, LlamaIndex indexes the data into a format that is easy to retrieve. It uses different indexes such as the VectorStoreIndex, Summary Index, Tree Index, and Keyword Table Index. In the querying stage, LlamaIndex retrieves the most relevant context given a user query and synthesizes a response using a response synthesizer. [Response from our Chatbot to the query 'What's LlamaIndex?']* 22 | 23 | ## 📋 How does it work? 24 | 25 | LlamaIndex enriches LLMs (for simplicity, we default the [ServiceContext](https://gpt-index.readthedocs.io/en/latest/core_modules/supporting_modules/service_context.html) to OpenAI GPT-3.5 which is then used for indexing and querying) with a custom knowledge base through a process called [Retrieval Augmented Generation (RAG)](https://research.ibm.com/blog/retrieval-augmented-generation-RAG) that involves the following steps: 26 | 27 | - **Connecting to a External Datasource**: We use the [Github Repository Loader](https://llamahub.ai/l/github_repo) available at [LlamaHub](https://llamahub.ai/) (an open-source repository for data loaders) to connect to the Github repository containing the markdown files of the LlamaIndex Docs: 28 | 29 | ```python 30 | def initialize_github_loader(github_token: str) -> GithubRepositoryReader: 31 | """Initialize GithubRepositoryReader""" 32 | 33 | download_loader("GithubRepositoryReader") 34 | github_client = GithubClient(github_token) 35 | 36 | loader = GithubRepositoryReader(github_client, [...]) 37 | 38 | return loader 39 | ``` 40 | 41 | - **Constructing Documents**: The markdown files of the Github repository are ingested and automatically converted to Document objects. In addition, we add the dictionary {'filename': '', 'author': ''} to the metadata of each document (which will be inhereited by the nodes). This will allow us to retrieve and display the data sources and scores in the chatbot responses to make our App more transparent: 42 | 43 | ```python 44 | def load_and_index_data(loader: GithubRepositoryReader) -> : 45 | """Load Knowledge Base from GitHub Repository""" 46 | 47 | logging.info("Loading data from Github: %s/%s", loader._owner, loader._repo) 48 | docs = loader.load_data(branch="main") 49 | for doc in docs: 50 | doc.metadata = {'filename': doc.extra_info['file_name'], 'author': "LlamaIndex"} 51 | ``` 52 | 53 | - **Parsing Nodes**: Nodes represent a *chunk* of a source Document, we have defined a chunk size of '1024' with an overlap of '32'. Similar to Documents, Nodes contain metadata and relationship information with other nodes. 54 | ```python 55 | [...] 56 | 57 | logging.info("Parsing documents into nodes...") 58 | parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=32) 59 | nodes = parser.get_nodes_from_documents(docs) 60 | ``` 61 | 62 | - **Indexing**: An Index is a data structure that allows to quickly retrieve relevant context for a user query. For LlamaIndex, it's the core foundation for retrieval-augmented generation (RAG) use-cases. LlamaIndex provides different types of indices, such as the [VectorStoreIndex](https://gpt-index.readthedocs.io/en/latest/core_modules/data_modules/index/index_guide.html), which makes LLM calls to compute embeddings: 63 | 64 | ```python 65 | [...] 66 | 67 | logging.info("Indexing nodes...") 68 | index = VectorStoreIndex(nodes) 69 | 70 | logging.info("Persisting index on ./storage...") 71 | index.storage_context.persist(persist_dir="./storage") 72 | 73 | logging.info("Data-Knowledge ingestion process is completed (OK)") 74 | ``` 75 | 76 | - **Querying (with cache)**: Once the index is constructed, querying a vector store index involves fetching the top-k most similar Nodes (by default 2), and passing those into the Response Synthesis module. The top Nodes are then appended to the user's prompt and passed to the LLM. We rely on the [Streamlit caching mechanism](https://docs.streamlit.io/library/advanced-features/caching) to optimize the performance and reduce the number of calls to the LLM: 77 | 78 | ```python 79 | @st.cache_data(max_entries=1024, show_spinner=False) 80 | def query_chatengine_cache(prompt, _chat_engine, settings): 81 | return _chat_engine.chat(prompt) 82 | ``` 83 | 84 | - **Parsing Response**: The App parses the response source nodes to extract the filename, author and score of the top-k similar Nodes (from which the answer was retrieved): 85 | 86 | ```python 87 | def get_metadata(response): 88 | sources = [] 89 | for item in response.source_nodes: 90 | if hasattr(item, "metadata"): 91 | filename = item.metadata.get('filename').replace('\\', '/') 92 | author = item.metadata.get('author') 93 | score = float("{:.3f}".format(item.score)) 94 | sources.append({'filename': filename, 'author': author, 'score': score}) 95 | 96 | return sources 97 | ``` 98 | 99 | - **Transparent Results with Source Citation**: The use of metadata enables to display links to the sources along with the author and relevance scores from which the answer was retrieved: 100 | 101 |

102 | 103 |

104 | 105 | 106 | - **Estimating Inference Cost**: By using [TokenCountingHandler](https://docs.llamaindex.ai/en/stable/examples/callbacks/TokenCountingHandler.html), the App tracks the number of 'LLM Prompt Tokens' and 'LLM Completion Tokens' to estimate the overall [GTP-3.5 inference costs](https://openai.com/pricing). 107 | 108 | ```python 109 | token_counter = TokenCountingHandler( 110 | tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode, 111 | verbose=False 112 | ) 113 | 114 | callback_manager = CallbackManager([token_counter]) 115 | service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo"), callback_manager=callback_manager) 116 | ``` 117 | 118 | 119 | ## 🚀 Quickstart 120 | 121 | 1. Clone the repository: 122 | ``` 123 | git clone git@github.com:dcarpintero/chatwithweb3.git 124 | ``` 125 | 126 | 2. Create and Activate a Virtual Environment: 127 | 128 | ``` 129 | Windows: 130 | 131 | py -m venv .venv 132 | .venv\scripts\activate 133 | 134 | macOS/Linux 135 | 136 | python3 -m venv .venv 137 | source .venv/bin/activate 138 | ``` 139 | 140 | 3. Install dependencies: 141 | 142 | ``` 143 | pip install -r requirements.txt 144 | ``` 145 | 146 | 4. Ingest Knowledge Base 147 | ``` 148 | python ingest_knowledge.py 149 | ``` 150 | 151 | 5. Launch Web Application 152 | 153 | ``` 154 | streamlit run ./app.py 155 | ``` 156 | 157 | ## 👩‍💻 Streamlit Web App 158 | 159 | Demo Web App deployed to [Streamlit Cloud](https://streamlit.io/cloud) and available at https://llamaindexchat.streamlit.app/ 160 | 161 | ## 📚 References 162 | 163 | - [LLamaIndex Doc Reference](https://gpt-index.readthedocs.io/en/latest/index.html) 164 | - [Get Started with Streamlit Cloud](https://docs.streamlit.io/streamlit-community-cloud/get-started) -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Streamlit application that integrates with LlamaIndex and OpenAI's GPT-3.5 to create a conversational interface. 3 | Users can ask questions about LlamaIndex Docs, and the application provides relevant answers. 4 | The user's OpenAI API key is used to fetch responses from GPT-3.5. 5 | 6 | Author: 7 | @dcarpintero : https://github.com/dcarpintero 8 | """ 9 | from llama_index.llms import OpenAI 10 | from llama_index import ServiceContext, StorageContext, load_index_from_storage 11 | from llama_index.callbacks import CallbackManager, TokenCountingHandler 12 | import openai 13 | import tiktoken 14 | import streamlit as st 15 | 16 | 17 | st.set_page_config( 18 | page_title="Chat with LlamaIndex Docs", 19 | page_icon="🦙", 20 | initial_sidebar_state="expanded", 21 | menu_items={"About": "Built by @dcarpintero with Streamlit & LLamaIndex"}, 22 | ) 23 | 24 | if 'llm_prompt_tokens' not in st.session_state: 25 | st.session_state['llm_prompt_tokens'] = 0 26 | 27 | if 'llm_completion_tokens' not in st.session_state: 28 | st.session_state['llm_completion_tokens'] = 0 29 | 30 | if 'openai_api_key' in st.session_state: 31 | openai.api_key = st.session_state['openai_api_key'] 32 | 33 | 34 | @st.cache_resource(show_spinner=False) 35 | def load_data(): 36 | """Load VectorStoreIndex from storage.""" 37 | 38 | with st.spinner("Loading Vector Store Index..."): 39 | token_counter = TokenCountingHandler( 40 | tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode, 41 | verbose=False 42 | ) 43 | 44 | callback_manager = CallbackManager([token_counter]) 45 | service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo"), callback_manager=callback_manager) 46 | index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./storage"), service_context=service_context) 47 | 48 | return index, token_counter 49 | 50 | def display_chat_history(messages): 51 | """Display previous chat messages.""" 52 | 53 | for message in messages: 54 | with st.chat_message(message["role"]): 55 | if st.session_state.with_sources: 56 | if "sources" in message: 57 | st.info(f'The sources of this response are:\n\n {message["sources"]}') 58 | st.write(message["content"]) 59 | 60 | 61 | def clear_chat_history(): 62 | """"Clear chat history and reset questions' buttons.""" 63 | 64 | st.session_state.messages = [ 65 | {"role": "assistant", "content": "Try one of the sample questions or ask your own!"} 66 | ] 67 | st.session_state["btn_llama_index"] = False 68 | st.session_state["btn_retriever"] = False 69 | st.session_state["btn_diff"] = False 70 | st.session_state["btn_rag"] = False 71 | 72 | 73 | def generate_assistant_response(prompt, chat_engine): 74 | """Generate assistant response and update token counter.""" 75 | 76 | with st.chat_message("assistant"): 77 | with st.spinner("I am on it..."): 78 | if st.session_state.with_cache: 79 | response = query_chatengine_cache(prompt, chat_engine) 80 | else: 81 | response = query_chatengine(prompt, chat_engine) 82 | 83 | message = {"role": "assistant", "content": response.response, "sources": format_sources(response)} 84 | if st.session_state.with_sources: 85 | st.info(f'The sources of this response are:\n\n {message["sources"]}') 86 | st.write(message["content"]) 87 | 88 | st.session_state.messages.append(message) 89 | 90 | 91 | @st.cache_data(max_entries=1024, show_spinner=False) 92 | def query_chatengine_cache(prompt, _chat_engine): 93 | """Query chat engine and cache results.""" 94 | return _chat_engine.chat(prompt) 95 | 96 | 97 | def query_chatengine(prompt, chat_engine): 98 | """Query chat engine.""" 99 | return chat_engine.chat(prompt) 100 | 101 | 102 | def format_sources(response): 103 | """Format filename, authors and scores of the response source nodes.""" 104 | base = "https://github.com/jerryjliu/llama_index/tree/main/" 105 | return "\n".join([f"- {base}{source['filename']} (author: '{source['author']}'; score: {source['score']})\n" for source in get_metadata(response)]) 106 | 107 | 108 | def get_metadata(response): 109 | """Parse response source nodes and return a list of dictionaries with filenames, authors and scores.""" 110 | 111 | sources = [] 112 | for item in response.source_nodes: 113 | if hasattr(item, "metadata"): 114 | filename = item.metadata.get('filename').replace('\\', '/') 115 | author = item.metadata.get('author') 116 | score = float("{:.3f}".format(item.score)) 117 | sources.append({'filename': filename, 'author': author, 'score': score}) 118 | 119 | return sources 120 | 121 | 122 | def update_token_counters(token_counter): 123 | """Update token counters """ 124 | 125 | st.session_state['llm_prompt_tokens'] += token_counter.prompt_llm_token_count 126 | st.session_state['llm_completion_tokens'] += token_counter.completion_llm_token_count 127 | 128 | # reset counter to avoid miscounting when the answer is cached! 129 | token_counter.reset_counts() 130 | 131 | 132 | def sidebar(): 133 | """Configure the sidebar and user's preferences.""" 134 | 135 | with st.sidebar.expander("🔑 OPENAI-API-KEY", expanded=True): 136 | st.text_input(label='OPENAI-API-KEY', type='password', key='openai_api_key', label_visibility='hidden').strip() 137 | "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)" 138 | 139 | with st.sidebar.expander("💲 GPT3.5 INFERENCE COST", expanded=True): 140 | i_tokens = st.session_state['llm_prompt_tokens'] 141 | o_tokens = st.session_state['llm_completion_tokens'] 142 | st.markdown(f'LLM Prompt: {i_tokens} tokens') 143 | st.markdown(f'LLM Completion: {o_tokens} tokens') 144 | 145 | i_cost = (i_tokens / 1000) * 0.0015 146 | o_cost = (o_tokens / 1000) * 0.002 147 | st.markdown('**Cost Estimation: ${0}**'.format(round(i_cost + o_cost, 5))) 148 | "[OpenAI Pricing](https://openai.com/pricing)" 149 | 150 | with st.sidebar.expander("🔧 SETTINGS", expanded=True): 151 | st.toggle('Cache Results', value=True, key="with_cache") 152 | st.toggle('Display Sources', value=True, key="with_sources") 153 | st.toggle('Streaming', value=False, disabled=True, key="with_streaming") 154 | 155 | st.sidebar.button('Clear Messages', type="primary", on_click=clear_chat_history) 156 | st.sidebar.divider() 157 | with st.sidebar: 158 | col_ll, col_gh = st.columns([1, 1]) 159 | with col_ll: 160 | "[![LlamaIndex Docs](https://img.shields.io/badge/LlamaIndex%20Docs-gray)](https://gpt-index.readthedocs.io/en/latest/index.html)" 161 | with col_gh: 162 | "[![Github](https://img.shields.io/badge/Github%20Repo-gray?logo=Github)](https://github.com/dcarpintero/llamaindexchat)" 163 | 164 | 165 | def layout(): 166 | """"Layout""" 167 | 168 | st.header("Chat with 🦙 LlamaIndex Docs 🗂️") 169 | 170 | # Get Started 171 | if not openai.api_key: 172 | st.warning("Hi there! Add your OPENAI-API-KEY on the sidebar field to get started!\n\n", icon="🚨") 173 | st.stop() 174 | 175 | # Load Index 176 | index, token_counter = load_data() 177 | if index: 178 | chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True) 179 | 180 | # Sample Questions for User input 181 | user_input_button = None 182 | 183 | btn_llama_index = st.session_state.get("btn_llama_index", False) 184 | btn_retriever = st.session_state.get("btn_retriever", False) 185 | btn_diff = st.session_state.get("btn_diff", False) 186 | btn_rag = st.session_state.get("btn_rag", False) 187 | 188 | col1, col2, col3, col4 = st.columns([1,1,1,1]) 189 | 190 | with col1: 191 | if st.button("explain the basic usage pattern of LlamaIndex", type="primary", disabled=btn_llama_index): 192 | user_input_button = "explain the basic usage pattern in LlamaIndex" 193 | st.session_state.btn_llama_index = True 194 | with col2: 195 | if st.button("how can I ingest data from the GoogleDocsReader?", type="primary", disabled=btn_retriever): 196 | user_input_button = "how can I ingest data from the GoogleDocsReader?" 197 | st.session_state.btn_retriever = True 198 | with col3: 199 | if st.button("what's the difference between document & node?", type="primary", disabled=btn_diff): 200 | user_input_button = "what's the difference between document and node?" 201 | st.session_state.btn_diff = True 202 | with col4: 203 | if st.button("how can I make a RAG application performant?", type="primary", disabled=btn_rag): 204 | user_input_button = "how can I make a RAG application performant?" 205 | st.session_state.btn_rag = True 206 | 207 | # System Message 208 | if "messages" not in st.session_state: 209 | st.session_state.messages = [ 210 | {"role": "assistant", "content": "Try one of the sample questions or ask your own!"} 211 | ] 212 | 213 | # User input 214 | user_input = st.chat_input("Your question") 215 | if user_input or user_input_button: 216 | st.session_state.messages.append({"role": "user", "content": user_input or user_input_button}) 217 | 218 | # Display previous chat 219 | display_chat_history(st.session_state.messages) 220 | 221 | # Generate response 222 | if st.session_state.messages[-1]["role"] != "assistant": 223 | try: 224 | generate_assistant_response(user_input or user_input_button, chat_engine) 225 | update_token_counters(token_counter) 226 | 227 | except Exception as ex: 228 | st.error(str(ex)) 229 | 230 | 231 | def main(): 232 | """Set up user preferences, and layout""" 233 | sidebar() 234 | layout() 235 | 236 | if __name__ == "__main__": 237 | main() 238 | -------------------------------------------------------------------------------- /assets/llamaindexchat-demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcarpintero/llamaindexchat/74ea0e186b670fe429e8bf58d921f77d0e01f8f2/assets/llamaindexchat-demo.mp4 -------------------------------------------------------------------------------- /assets/llamaindexchat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcarpintero/llamaindexchat/74ea0e186b670fe429e8bf58d921f77d0e01f8f2/assets/llamaindexchat.png -------------------------------------------------------------------------------- /assets/sourcecitation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcarpintero/llamaindexchat/74ea0e186b670fe429e8bf58d921f77d0e01f8f2/assets/sourcecitation.png -------------------------------------------------------------------------------- /ingest_knowledge.py: -------------------------------------------------------------------------------- 1 | """ 2 | Knowledge Ingestion of the markdown files in the docs folder of the llama_index repository: 3 | - https://github.com/jerryjliu/llama_index 4 | 5 | Built with LlamaIndex, GithubRepositoryReader, and OpenAI. 6 | 7 | Author: 8 | @dcarpintero : https://github.com/dcarpintero 9 | """ 10 | from llama_index import download_loader, VectorStoreIndex 11 | from llama_index.node_parser import SimpleNodeParser 12 | from llama_hub.github_repo import GithubRepositoryReader, GithubClient 13 | from dotenv import load_dotenv 14 | import openai 15 | import os 16 | import logging 17 | 18 | 19 | def load_environment_vars() -> dict: 20 | """Load required environment variables. Raise an exception if any are missing.""" 21 | 22 | load_dotenv() 23 | 24 | api_key = os.getenv("OPENAI_API_KEY") 25 | github_token = os.getenv("GITHUB_TOKEN") 26 | 27 | if not api_key: 28 | raise EnvironmentError("OPENAI_API_KEY environment variable not set.") 29 | 30 | if not github_token: 31 | raise EnvironmentError("GITHUB_TOKEN environment variable not set.") 32 | 33 | logging.info("Environment variables loaded.") 34 | return {"OPENAI_API_KEY": api_key, "GITHUB_TOKEN": github_token} 35 | 36 | 37 | def initialize_github_loader(github_token: str) -> GithubRepositoryReader: 38 | """Initialize GithubRepositoryReader""" 39 | 40 | download_loader("GithubRepositoryReader") 41 | github_client = GithubClient(github_token) 42 | 43 | loader = GithubRepositoryReader( 44 | github_client, 45 | owner = "jerryjliu", 46 | repo = "llama_index", 47 | filter_directories = (["docs"], GithubRepositoryReader.FilterType.INCLUDE), 48 | filter_file_extensions = ([".md"], GithubRepositoryReader.FilterType.INCLUDE), 49 | verbose = False, 50 | concurrent_requests = 10, 51 | ) 52 | 53 | return loader 54 | 55 | 56 | def load_and_index_data(loader) -> VectorStoreIndex: 57 | """Load and Index Knowledge Base from GitHub Repository""" 58 | 59 | docs = load_data(loader) 60 | index = index_data(docs) 61 | return index 62 | 63 | 64 | def load_data(loader: GithubRepositoryReader) -> []: 65 | """Load Knowledge Base from GitHub Repository""" 66 | 67 | logging.info("Loading data from Github: %s/%s", loader._owner, loader._repo) 68 | docs = loader.load_data(branch="main") 69 | for doc in docs: 70 | logging.info(doc.extra_info) 71 | doc.metadata = {'filename': doc.extra_info['file_name'], 'author': "LlamaIndex"} 72 | 73 | return docs 74 | 75 | def index_data(docs: []) -> VectorStoreIndex: 76 | """Index Documents""" 77 | 78 | logging.info("Parsing documents into nodes...") 79 | parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=32) 80 | nodes = parser.get_nodes_from_documents(docs) 81 | 82 | logging.info("Indexing nodes...") 83 | index = VectorStoreIndex(nodes) 84 | 85 | logging.info("Persisting index on ./storage...") 86 | index.storage_context.persist(persist_dir="./storage") 87 | 88 | logging.info("Data-Knowledge ingestion process is completed (OK)") 89 | return index 90 | 91 | if __name__ == "__main__": 92 | logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") 93 | 94 | try: 95 | env_vars = load_environment_vars() 96 | openai.api_key = env_vars['OPENAI_API_KEY'] 97 | 98 | loader = initialize_github_loader(env_vars['GITHUB_TOKEN']) 99 | load_and_index_data(loader) 100 | except Exception as ex: 101 | logging.error("Unexpected Error: %s", ex) 102 | raise ex -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | llama-hub==0.0.38 2 | llama-index==0.8.44 3 | nltk==3.8.1 4 | openai==0.28.0 5 | python-dotenv==1.0.0 6 | streamlit==1.27.2 7 | tiktoken==0.5.1 8 | sentence-transformers==2.2.2 -------------------------------------------------------------------------------- /storage/graph_store.json: -------------------------------------------------------------------------------- 1 | {"graph_dict": {}} -------------------------------------------------------------------------------- /storage/index_store.json: -------------------------------------------------------------------------------- 1 | {"index_store/data": {"7f938a8c-2e4a-472b-887a-8709b15b8e75": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7f938a8c-2e4a-472b-887a-8709b15b8e75\", \"summary\": null, \"nodes_dict\": {\"4437d6b6-ead6-4a1d-9251-df93995d95b8\": \"4437d6b6-ead6-4a1d-9251-df93995d95b8\", \"e6af2f51-3bf0-45b1-8297-620133b65139\": \"e6af2f51-3bf0-45b1-8297-620133b65139\", \"e75ac405-364a-43a9-b9c3-95e2bf6f4185\": \"e75ac405-364a-43a9-b9c3-95e2bf6f4185\", \"679f0feb-a0e4-450e-9d1d-f93372bc8235\": \"679f0feb-a0e4-450e-9d1d-f93372bc8235\", \"1e6e7b26-278d-414d-a59e-556fbe6e211f\": \"1e6e7b26-278d-414d-a59e-556fbe6e211f\", \"afa369ba-648a-4758-803a-1386ec8dfff9\": \"afa369ba-648a-4758-803a-1386ec8dfff9\", \"5bf8d116-1a65-47ea-b92c-dea5bee72bf3\": \"5bf8d116-1a65-47ea-b92c-dea5bee72bf3\", \"b025c8a5-eb7a-481d-9fed-688ab2e976be\": \"b025c8a5-eb7a-481d-9fed-688ab2e976be\", \"30e455b0-1e7b-43d2-a247-65f89200d59b\": \"30e455b0-1e7b-43d2-a247-65f89200d59b\", \"5ebcf58d-2516-4f5e-8d00-2cb8d086edaa\": \"5ebcf58d-2516-4f5e-8d00-2cb8d086edaa\", \"8122e965-68d2-4c8c-be75-f230747c599e\": \"8122e965-68d2-4c8c-be75-f230747c599e\", \"cce95214-17e7-4280-9ff7-f0339a6cc7b1\": \"cce95214-17e7-4280-9ff7-f0339a6cc7b1\", \"e2e0ae66-1caa-4ffb-b490-0e086abef319\": \"e2e0ae66-1caa-4ffb-b490-0e086abef319\", \"3b4a69ca-9f98-4e7e-8444-82b567e8452d\": \"3b4a69ca-9f98-4e7e-8444-82b567e8452d\", \"219e3285-8514-4d57-aabc-b9be90bcee77\": \"219e3285-8514-4d57-aabc-b9be90bcee77\", \"f426cf7b-db7b-4b36-970b-111415f503fb\": \"f426cf7b-db7b-4b36-970b-111415f503fb\", \"c0db3248-daff-412a-995f-9c26aee4eeb7\": \"c0db3248-daff-412a-995f-9c26aee4eeb7\", \"811a1f10-f697-4c37-9591-b6f8b87a9e11\": \"811a1f10-f697-4c37-9591-b6f8b87a9e11\", \"9f280e4b-20e3-47bf-bef0-cff81dd64831\": \"9f280e4b-20e3-47bf-bef0-cff81dd64831\", \"474c71b0-ce49-4b88-85a4-e1ec94ce3738\": \"474c71b0-ce49-4b88-85a4-e1ec94ce3738\", \"f8101c5f-71f6-4812-9e3b-6547502b5211\": \"f8101c5f-71f6-4812-9e3b-6547502b5211\", \"f7597301-8057-424e-9d46-f2e59157cd91\": \"f7597301-8057-424e-9d46-f2e59157cd91\", \"f2053e36-16d6-4f9e-bd97-ee8e5e4faad9\": \"f2053e36-16d6-4f9e-bd97-ee8e5e4faad9\", \"9f9535a8-5be2-48d7-bcfa-2345258ee81a\": \"9f9535a8-5be2-48d7-bcfa-2345258ee81a\", \"bd56e86b-54cc-4e3a-baa0-5e37c98227aa\": \"bd56e86b-54cc-4e3a-baa0-5e37c98227aa\", \"01e3977e-f6d5-45a2-b686-b5130b963879\": \"01e3977e-f6d5-45a2-b686-b5130b963879\", \"c8702fc4-96f5-4890-800b-41eab8d9e957\": \"c8702fc4-96f5-4890-800b-41eab8d9e957\", \"1c9e7a9f-0ee3-4670-9074-978616b35410\": \"1c9e7a9f-0ee3-4670-9074-978616b35410\", \"6f9c0fbd-5a60-4de4-8edc-b0c70177461d\": \"6f9c0fbd-5a60-4de4-8edc-b0c70177461d\", \"7c2aec87-a8df-4e9a-a304-1e62a164efce\": \"7c2aec87-a8df-4e9a-a304-1e62a164efce\", \"e52f36a5-f33b-43c1-9cb8-bb1b85b5911b\": \"e52f36a5-f33b-43c1-9cb8-bb1b85b5911b\", \"7dea053a-d622-4c32-b6ba-eb1b8c64a10f\": \"7dea053a-d622-4c32-b6ba-eb1b8c64a10f\", \"d73fccdc-8c0f-4b39-b308-1a16bb28b10c\": \"d73fccdc-8c0f-4b39-b308-1a16bb28b10c\", \"07ec6934-f93b-4bae-828e-38808b9e6bc6\": \"07ec6934-f93b-4bae-828e-38808b9e6bc6\", \"67c83569-5531-4a11-b0cb-5d95f16b7450\": \"67c83569-5531-4a11-b0cb-5d95f16b7450\", \"f9c4f6bb-62d7-42d6-8a92-5a1f035e0b70\": \"f9c4f6bb-62d7-42d6-8a92-5a1f035e0b70\", \"e22c2099-983c-4151-a2ef-b7e3ac87623a\": \"e22c2099-983c-4151-a2ef-b7e3ac87623a\", \"8dbd8b26-46bf-4ee8-866b-02e979cfea67\": \"8dbd8b26-46bf-4ee8-866b-02e979cfea67\", \"44715510-67b8-431c-9ad6-321b8cffc5d1\": \"44715510-67b8-431c-9ad6-321b8cffc5d1\", \"25180ddc-2ed7-409a-abb1-82f600017635\": \"25180ddc-2ed7-409a-abb1-82f600017635\", \"6cd88956-15fe-496c-b768-9bdc4107c57c\": \"6cd88956-15fe-496c-b768-9bdc4107c57c\", \"3a3b62b8-4da6-4dd5-ba96-338bbc9deb24\": \"3a3b62b8-4da6-4dd5-ba96-338bbc9deb24\", \"46562a93-963b-4a61-ba5c-c5cf7876b8b2\": \"46562a93-963b-4a61-ba5c-c5cf7876b8b2\", \"27040026-fa6b-4c67-86b4-71232e5e56da\": \"27040026-fa6b-4c67-86b4-71232e5e56da\", \"506de4fd-7b68-4a6b-ab6e-151903c85672\": \"506de4fd-7b68-4a6b-ab6e-151903c85672\", \"f9f1105f-8f5d-4942-a700-13f2bb1a6abb\": \"f9f1105f-8f5d-4942-a700-13f2bb1a6abb\", \"b6977624-9a24-46bb-a840-4d6132bdfe80\": \"b6977624-9a24-46bb-a840-4d6132bdfe80\", \"735bc8a0-d89a-422d-8b09-53b423ad58fd\": \"735bc8a0-d89a-422d-8b09-53b423ad58fd\", \"bfb19442-7092-4b42-a9a7-9c064aa615f8\": \"bfb19442-7092-4b42-a9a7-9c064aa615f8\", \"e8f9b733-7c89-4eca-b99d-9a07fcf20152\": \"e8f9b733-7c89-4eca-b99d-9a07fcf20152\", \"1308b8dc-b079-497f-b853-0b147528dfd2\": \"1308b8dc-b079-497f-b853-0b147528dfd2\", \"811b2cb7-db86-4a14-a8a9-46b0a494c161\": \"811b2cb7-db86-4a14-a8a9-46b0a494c161\", \"c1162b51-89f2-4f63-947a-8e53179cea32\": \"c1162b51-89f2-4f63-947a-8e53179cea32\", \"d8f9b13b-6791-42b3-bd6e-b000d8b06da2\": \"d8f9b13b-6791-42b3-bd6e-b000d8b06da2\", \"9b96a9ee-9180-4349-a49c-01882c8c2379\": \"9b96a9ee-9180-4349-a49c-01882c8c2379\", \"7fcb8820-8880-4da2-a3a4-89c1899defc9\": \"7fcb8820-8880-4da2-a3a4-89c1899defc9\", \"d82c4727-508f-414e-bd64-5a5e6ee875c7\": \"d82c4727-508f-414e-bd64-5a5e6ee875c7\", \"5576d7f8-447c-4cbb-84c3-314a78d44f8f\": \"5576d7f8-447c-4cbb-84c3-314a78d44f8f\", \"e48b0d18-726d-471d-9702-0a63215a1d2e\": \"e48b0d18-726d-471d-9702-0a63215a1d2e\", \"7acbc142-6cba-4d56-8100-21465098753a\": \"7acbc142-6cba-4d56-8100-21465098753a\", \"626a612e-5a29-45d3-a4e0-bf4e7c89b2d5\": \"626a612e-5a29-45d3-a4e0-bf4e7c89b2d5\", \"f73077f6-5d32-4c13-af90-2452491bcd54\": \"f73077f6-5d32-4c13-af90-2452491bcd54\", \"4dab9719-d28e-4d95-a862-eb6e16b925c7\": \"4dab9719-d28e-4d95-a862-eb6e16b925c7\", \"d8206bbc-6e15-4345-964f-0db151da7a97\": \"d8206bbc-6e15-4345-964f-0db151da7a97\", \"e0caea2b-f168-4262-9b95-224c6b8996b2\": \"e0caea2b-f168-4262-9b95-224c6b8996b2\", \"c2b9f3c1-0183-4266-8ee0-e3b421a1e7f0\": \"c2b9f3c1-0183-4266-8ee0-e3b421a1e7f0\", \"7d0f937c-a29c-4a7c-9ef3-682d8123adab\": \"7d0f937c-a29c-4a7c-9ef3-682d8123adab\", \"38868d0e-6a25-4e21-a37c-139717704171\": \"38868d0e-6a25-4e21-a37c-139717704171\", \"a9858431-5d39-4132-8408-d13cfcd97201\": \"a9858431-5d39-4132-8408-d13cfcd97201\", \"985acb4b-717c-4e17-b03f-4cad246da1c7\": \"985acb4b-717c-4e17-b03f-4cad246da1c7\", \"a612f8cc-967f-4289-bd5c-b1fd178bcd40\": \"a612f8cc-967f-4289-bd5c-b1fd178bcd40\", \"28c994a0-580e-4746-a12e-d5ac10b1f26f\": \"28c994a0-580e-4746-a12e-d5ac10b1f26f\", \"caa6858d-0b2d-462a-9e7d-a6b5ddfdbf0c\": \"caa6858d-0b2d-462a-9e7d-a6b5ddfdbf0c\", \"5a2d46f6-4ddf-48ce-b498-1b0d24cc2cb2\": \"5a2d46f6-4ddf-48ce-b498-1b0d24cc2cb2\", \"bccebf27-b755-4da4-9864-3e2098e6da3e\": \"bccebf27-b755-4da4-9864-3e2098e6da3e\", \"ac8b9270-ffc5-4520-b9d2-5b2834a8b691\": \"ac8b9270-ffc5-4520-b9d2-5b2834a8b691\", \"05d8bf6c-6183-4751-8a8a-1ae83a70a652\": \"05d8bf6c-6183-4751-8a8a-1ae83a70a652\", \"dccd2d0b-197d-4871-9aaf-f42ea6d4c119\": \"dccd2d0b-197d-4871-9aaf-f42ea6d4c119\", \"40f8c65b-652a-434b-83d5-d413348c015e\": \"40f8c65b-652a-434b-83d5-d413348c015e\", \"61aeaf9c-7e1b-4638-9dbb-aa23f87aefb4\": \"61aeaf9c-7e1b-4638-9dbb-aa23f87aefb4\", \"10dfb162-591a-4c8a-af2b-ad51ac5635a4\": \"10dfb162-591a-4c8a-af2b-ad51ac5635a4\", \"f9b0a12b-11c9-4a58-ae7f-31b9e2a8c3a9\": \"f9b0a12b-11c9-4a58-ae7f-31b9e2a8c3a9\", \"330c65bc-2b0e-4d90-89f0-e8c59f1a0b2e\": \"330c65bc-2b0e-4d90-89f0-e8c59f1a0b2e\", \"030b0cd8-541b-47e7-befb-69aaeeb69fd0\": \"030b0cd8-541b-47e7-befb-69aaeeb69fd0\", \"76b26bd0-4bde-4854-b485-e097abae0a0f\": \"76b26bd0-4bde-4854-b485-e097abae0a0f\", \"2551cd80-a535-4dec-8f2c-50095a3dfa39\": \"2551cd80-a535-4dec-8f2c-50095a3dfa39\", \"d8920524-7a92-4572-9c2a-b8a3e37a3970\": \"d8920524-7a92-4572-9c2a-b8a3e37a3970\", \"2fd9c628-f1f2-4c0c-8f1f-5acc1000985b\": \"2fd9c628-f1f2-4c0c-8f1f-5acc1000985b\", \"5be3f2b3-b666-4d89-afe9-9308b99dddfa\": \"5be3f2b3-b666-4d89-afe9-9308b99dddfa\", \"8e27449d-52ac-41cb-ba21-9dbf8b23ee0a\": \"8e27449d-52ac-41cb-ba21-9dbf8b23ee0a\", \"53d695ce-fc96-4ce3-9f81-a6f4bc42c7d9\": \"53d695ce-fc96-4ce3-9f81-a6f4bc42c7d9\", \"60e2a7b0-3822-4db0-b36f-fe4c5f79749f\": \"60e2a7b0-3822-4db0-b36f-fe4c5f79749f\", \"2f005cd4-360b-4e92-8a7a-7638c571428c\": \"2f005cd4-360b-4e92-8a7a-7638c571428c\", \"f848b2dc-c545-438a-aac6-2da67a02ad7d\": \"f848b2dc-c545-438a-aac6-2da67a02ad7d\", \"eb6b3f75-8e8f-4b4d-8ea7-405bad15aa42\": \"eb6b3f75-8e8f-4b4d-8ea7-405bad15aa42\", \"dc78f6aa-4eb3-4885-b984-635e33d81e1a\": \"dc78f6aa-4eb3-4885-b984-635e33d81e1a\", \"b35c455a-9d46-4ade-8432-722997229b01\": \"b35c455a-9d46-4ade-8432-722997229b01\", \"1f0e9a72-a9c4-468b-8f4f-7104f003068e\": \"1f0e9a72-a9c4-468b-8f4f-7104f003068e\", \"2169d002-ca5c-476c-8df3-eb9e39c74eaf\": \"2169d002-ca5c-476c-8df3-eb9e39c74eaf\", \"5ba2a608-17fa-4791-89f8-23f07a77bc3d\": \"5ba2a608-17fa-4791-89f8-23f07a77bc3d\", \"d10b62f5-9f13-484e-8f54-bb8ccaff358a\": \"d10b62f5-9f13-484e-8f54-bb8ccaff358a\", \"0c2a9fc8-fcd4-4543-ab26-deee440d6f69\": \"0c2a9fc8-fcd4-4543-ab26-deee440d6f69\", \"8eee734c-38a3-417c-b234-8cbb9df1b1aa\": \"8eee734c-38a3-417c-b234-8cbb9df1b1aa\", \"701411f3-d6cf-472f-9044-21e5e86f5f4a\": \"701411f3-d6cf-472f-9044-21e5e86f5f4a\", \"c40fb52a-59ce-4300-93ed-e52bc1c0a18f\": \"c40fb52a-59ce-4300-93ed-e52bc1c0a18f\", \"fa87eaf5-9ccf-4d36-bb6e-6619ef22e5e2\": \"fa87eaf5-9ccf-4d36-bb6e-6619ef22e5e2\", \"0912f890-b478-4670-9e15-d994aac2cf44\": \"0912f890-b478-4670-9e15-d994aac2cf44\", \"696f5357-db8c-4c4b-81f0-5259c91ce418\": \"696f5357-db8c-4c4b-81f0-5259c91ce418\", \"96540789-826e-40b9-ba7a-6137b1eae712\": \"96540789-826e-40b9-ba7a-6137b1eae712\", \"6f88b8dc-f3a2-4c1a-bd4b-7b50716f06c6\": \"6f88b8dc-f3a2-4c1a-bd4b-7b50716f06c6\", \"1bbaca72-5f2b-4df7-9b82-2fbc645f8eb2\": \"1bbaca72-5f2b-4df7-9b82-2fbc645f8eb2\", \"20d0b081-42d7-4dfc-890f-a3f22e25a4ca\": \"20d0b081-42d7-4dfc-890f-a3f22e25a4ca\", \"0696e8c1-c965-456c-a011-9ef3e297b71a\": \"0696e8c1-c965-456c-a011-9ef3e297b71a\", \"96252df0-81a0-4037-bb4f-3fdc546ef84d\": \"96252df0-81a0-4037-bb4f-3fdc546ef84d\", \"c3288a8a-9ee9-4db9-b48b-1d5a7d091913\": \"c3288a8a-9ee9-4db9-b48b-1d5a7d091913\", \"948fa402-50f5-47b4-bbf9-cfb44fb9b649\": \"948fa402-50f5-47b4-bbf9-cfb44fb9b649\", \"0dbc1fbc-a574-4dd3-b0a0-df32bd0218b0\": \"0dbc1fbc-a574-4dd3-b0a0-df32bd0218b0\", \"127950f3-614b-4ab1-aa62-17af686c4039\": \"127950f3-614b-4ab1-aa62-17af686c4039\", \"a495af7c-c87a-496b-a868-5b8f30953f42\": \"a495af7c-c87a-496b-a868-5b8f30953f42\", \"d75a6a73-65df-4a57-96bf-41bf1d77ab17\": \"d75a6a73-65df-4a57-96bf-41bf1d77ab17\", \"8c21c0d0-cf6b-4a03-80c6-66f9bb5c153b\": \"8c21c0d0-cf6b-4a03-80c6-66f9bb5c153b\", \"3e553d2d-28b3-4576-bd6a-e4833517ee4b\": \"3e553d2d-28b3-4576-bd6a-e4833517ee4b\", \"290972af-dd34-49b7-8c81-43d512b3d500\": \"290972af-dd34-49b7-8c81-43d512b3d500\", \"d9f52f29-8ac7-40e5-81e6-b4e1cf135561\": \"d9f52f29-8ac7-40e5-81e6-b4e1cf135561\", \"6558c03c-1f31-416b-9f95-f9a8e51c90f8\": \"6558c03c-1f31-416b-9f95-f9a8e51c90f8\", \"30cc6acc-df1d-4249-be11-3c1e90cbc9a9\": \"30cc6acc-df1d-4249-be11-3c1e90cbc9a9\", \"d0afa067-1613-42a3-a67d-d440ad8901b2\": \"d0afa067-1613-42a3-a67d-d440ad8901b2\", \"e319874d-fc82-4b5a-a288-52af9c990a82\": \"e319874d-fc82-4b5a-a288-52af9c990a82\", \"81d03ce7-6627-45a7-8a9f-66374ec48eb0\": \"81d03ce7-6627-45a7-8a9f-66374ec48eb0\", \"c27b9bc8-c421-4da3-8b0b-2a72343235a9\": \"c27b9bc8-c421-4da3-8b0b-2a72343235a9\", \"4b7ca0e6-132d-4928-a648-c728ad3171a1\": \"4b7ca0e6-132d-4928-a648-c728ad3171a1\", \"afdaba8b-1eea-488d-b4da-ebab174cec70\": \"afdaba8b-1eea-488d-b4da-ebab174cec70\", \"8c626d5e-a15a-43a8-be3f-18a0a556655a\": \"8c626d5e-a15a-43a8-be3f-18a0a556655a\", \"98988af5-7ed4-42ce-a24e-b2eae1327a55\": \"98988af5-7ed4-42ce-a24e-b2eae1327a55\", \"bfa861a8-eead-4df4-b4b9-0987c129ad3e\": \"bfa861a8-eead-4df4-b4b9-0987c129ad3e\", \"31d74118-6633-45bd-939d-20eb4f24a622\": \"31d74118-6633-45bd-939d-20eb4f24a622\", \"97ad579d-fce9-484d-8665-0c18c2151d28\": \"97ad579d-fce9-484d-8665-0c18c2151d28\", \"2bc3ad40-0112-49aa-8bf1-7aa42d89642f\": \"2bc3ad40-0112-49aa-8bf1-7aa42d89642f\", \"cd444059-4829-454b-84a7-5890a6c92a80\": \"cd444059-4829-454b-84a7-5890a6c92a80\", \"b5736017-4998-4aea-ba88-7fcd6b544808\": \"b5736017-4998-4aea-ba88-7fcd6b544808\", \"cc232f0b-69fb-45af-bbc9-77e97ff5e84c\": \"cc232f0b-69fb-45af-bbc9-77e97ff5e84c\", \"45e5b4de-d494-4270-880b-1fa853e039ab\": \"45e5b4de-d494-4270-880b-1fa853e039ab\", \"49ce9594-5f43-407c-9e20-4366e340ff72\": \"49ce9594-5f43-407c-9e20-4366e340ff72\", \"6868a6e6-abd1-4a18-a7d4-0a8bc78aaa29\": \"6868a6e6-abd1-4a18-a7d4-0a8bc78aaa29\", \"824f712c-18a8-49e1-b746-94906a75383d\": \"824f712c-18a8-49e1-b746-94906a75383d\", \"d35927e0-0092-4e38-b844-9f1d050d2862\": \"d35927e0-0092-4e38-b844-9f1d050d2862\", \"efa12611-2447-4d58-8a08-26a3704535b8\": \"efa12611-2447-4d58-8a08-26a3704535b8\", \"8a8f7393-61ee-48f4-a416-8165f53882ac\": \"8a8f7393-61ee-48f4-a416-8165f53882ac\", \"657efc36-df83-4e56-933f-89039a0395a5\": \"657efc36-df83-4e56-933f-89039a0395a5\", \"72a3bcd2-7945-444e-965b-c18a766f7a5d\": \"72a3bcd2-7945-444e-965b-c18a766f7a5d\", \"1d32a6ac-f244-4992-bd34-440a2182a532\": \"1d32a6ac-f244-4992-bd34-440a2182a532\", \"01a3a473-0083-40fa-8a11-75f394861e2d\": \"01a3a473-0083-40fa-8a11-75f394861e2d\", \"15707a2c-3f1e-4723-86fd-67091dcf67ba\": \"15707a2c-3f1e-4723-86fd-67091dcf67ba\", \"8c778fa6-168d-4457-a731-fcefc050a6d3\": \"8c778fa6-168d-4457-a731-fcefc050a6d3\", \"52e7f52d-519c-48fd-85e3-73a0d296b135\": \"52e7f52d-519c-48fd-85e3-73a0d296b135\", \"2fb6d4f0-f671-424e-8640-8accc6adab3b\": \"2fb6d4f0-f671-424e-8640-8accc6adab3b\", \"96dca026-efdc-4ac7-90bf-906b643f1573\": \"96dca026-efdc-4ac7-90bf-906b643f1573\", \"a27331a1-1bad-4ccd-9bd0-429465d80172\": \"a27331a1-1bad-4ccd-9bd0-429465d80172\", \"6dcacd63-f1ce-4905-a2b3-a367a12576da\": \"6dcacd63-f1ce-4905-a2b3-a367a12576da\", \"a0d2eca7-f20c-49eb-9cd7-6a80d576903c\": \"a0d2eca7-f20c-49eb-9cd7-6a80d576903c\", \"3b1f1bc5-e57a-4694-b624-7aa73d18f9d9\": \"3b1f1bc5-e57a-4694-b624-7aa73d18f9d9\", \"9d950bef-82ba-44c8-8f18-975040b880f2\": \"9d950bef-82ba-44c8-8f18-975040b880f2\", \"6e0f2a6d-1eb9-4f30-b31b-55ae0e800e41\": \"6e0f2a6d-1eb9-4f30-b31b-55ae0e800e41\", \"f1bc3745-7815-4391-8174-7fc8d283a3b4\": \"f1bc3745-7815-4391-8174-7fc8d283a3b4\", \"a607c6b3-397e-4db9-b6ae-e7abd7a7dcca\": \"a607c6b3-397e-4db9-b6ae-e7abd7a7dcca\", \"2ccd8336-2484-4eae-97b2-12f9eae6c1e6\": \"2ccd8336-2484-4eae-97b2-12f9eae6c1e6\", \"ee52c155-2606-41fe-9846-bfd036b7b5e2\": \"ee52c155-2606-41fe-9846-bfd036b7b5e2\", \"d1231944-90c4-4c97-b5d7-fde7e74ccc21\": \"d1231944-90c4-4c97-b5d7-fde7e74ccc21\", \"45c9d7ae-cd4f-4588-b588-fdfee152552e\": \"45c9d7ae-cd4f-4588-b588-fdfee152552e\", \"2b2b1191-5427-4142-af6c-010b7c64bcbc\": \"2b2b1191-5427-4142-af6c-010b7c64bcbc\", \"7a0954fe-8e8c-402f-b89c-ebb4b163df9e\": \"7a0954fe-8e8c-402f-b89c-ebb4b163df9e\", \"8c1168ba-de1c-4147-a569-fbae40ef8a50\": \"8c1168ba-de1c-4147-a569-fbae40ef8a50\", \"7b5e823d-e520-446c-962d-0235c8a2c5c5\": \"7b5e823d-e520-446c-962d-0235c8a2c5c5\", \"6b920c82-667e-473f-a2c0-2b881870a3f2\": \"6b920c82-667e-473f-a2c0-2b881870a3f2\", \"64db79af-099c-48d7-b5b9-85fd7709df64\": \"64db79af-099c-48d7-b5b9-85fd7709df64\", \"258808ef-4ff1-4f26-a477-640304ad78bb\": \"258808ef-4ff1-4f26-a477-640304ad78bb\", \"5772201a-3ccc-4a5d-acaf-a9eb0639fe43\": \"5772201a-3ccc-4a5d-acaf-a9eb0639fe43\", \"a7854179-f654-4452-bee0-2e6374805dfc\": \"a7854179-f654-4452-bee0-2e6374805dfc\", \"c536587f-791c-4772-8240-4ba228f5940c\": \"c536587f-791c-4772-8240-4ba228f5940c\", \"b3d9f8c2-cbbc-48cc-9629-8c91b5fdb8bd\": \"b3d9f8c2-cbbc-48cc-9629-8c91b5fdb8bd\", \"c0afb204-4477-4656-aaf8-694298244255\": \"c0afb204-4477-4656-aaf8-694298244255\", \"de9ae151-e2de-44ac-b01a-b5e2646da0cb\": \"de9ae151-e2de-44ac-b01a-b5e2646da0cb\", \"b8cc04a2-dcdc-4906-93a6-5680da2419b9\": \"b8cc04a2-dcdc-4906-93a6-5680da2419b9\", \"2e13ebfd-5b4e-4cf0-ada8-90a14f3b2460\": \"2e13ebfd-5b4e-4cf0-ada8-90a14f3b2460\", \"85dcd5ea-0c20-4794-b57b-921ea5d26d07\": \"85dcd5ea-0c20-4794-b57b-921ea5d26d07\", \"dc9241f9-6eab-4ef3-9b1e-8d8b848793b1\": \"dc9241f9-6eab-4ef3-9b1e-8d8b848793b1\", \"cc31c75a-073c-454c-afa5-386cca33651b\": \"cc31c75a-073c-454c-afa5-386cca33651b\", \"cfb0fbc0-d960-4800-9444-02cb6807cec8\": \"cfb0fbc0-d960-4800-9444-02cb6807cec8\", \"32cff3f3-52e3-49c6-aed1-ef7cf3e01a80\": \"32cff3f3-52e3-49c6-aed1-ef7cf3e01a80\", \"e752d1df-66ca-49a5-a082-c5c5924c6bde\": \"e752d1df-66ca-49a5-a082-c5c5924c6bde\", \"5dadfaa5-7c77-47b0-8095-e55f75b47de2\": \"5dadfaa5-7c77-47b0-8095-e55f75b47de2\", \"f3cec787-bb9f-4a16-b5c1-8f2b60e67e51\": \"f3cec787-bb9f-4a16-b5c1-8f2b60e67e51\", \"cd8c80cc-4f13-4cd6-bce3-e120fd8e252e\": \"cd8c80cc-4f13-4cd6-bce3-e120fd8e252e\", \"8fd22a72-b9d0-44e8-a82d-daad4ee4d958\": \"8fd22a72-b9d0-44e8-a82d-daad4ee4d958\", \"5d5471e1-ed95-4deb-83ec-eb06ae3147bf\": \"5d5471e1-ed95-4deb-83ec-eb06ae3147bf\", \"e43348b4-496d-466c-ab26-1b0027fb8ede\": \"e43348b4-496d-466c-ab26-1b0027fb8ede\", \"c586ca61-5735-49bc-ac31-dbc9677f884a\": \"c586ca61-5735-49bc-ac31-dbc9677f884a\", \"5f685536-34c9-4246-b8de-d9babb267b3c\": \"5f685536-34c9-4246-b8de-d9babb267b3c\", \"5863ca78-b666-457d-ac16-851c2e683cc9\": \"5863ca78-b666-457d-ac16-851c2e683cc9\", \"36c049ca-bfa2-4835-9c50-b1e5f333e759\": \"36c049ca-bfa2-4835-9c50-b1e5f333e759\", \"41bc4137-2d62-416e-9f3d-c92b75b40965\": \"41bc4137-2d62-416e-9f3d-c92b75b40965\", \"4a242449-8afc-476b-b24b-71b5aadb4cfe\": \"4a242449-8afc-476b-b24b-71b5aadb4cfe\", \"1348302c-f1a8-4312-90db-958594383c5a\": \"1348302c-f1a8-4312-90db-958594383c5a\", \"f1aaf379-77fe-4e70-adbe-adc62ec58f48\": \"f1aaf379-77fe-4e70-adbe-adc62ec58f48\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}} --------------------------------------------------------------------------------