├── .gitignore
├── .streamlit
└── config.toml
├── README.md
├── app.py
├── assets
├── llamaindexchat-demo.mp4
├── llamaindexchat.png
└── sourcecitation.png
├── ingest_knowledge.py
├── requirements.txt
└── storage
├── docstore.json
├── graph_store.json
├── index_store.json
└── vector_store.json
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [logger]
2 |
3 | # Level of logging: 'error', 'warning', 'info', or 'debug'.
4 | level = 'info'
5 |
6 | # String format for logging messages. If logger.datetimeFormat is set, logger messages will default to `%(asctime)s.%(msecs)03d %(message)s`. See [Python's documentation](https://docs.python.org/2.6/library/logging.html#formatter-objects) for available attributes.
7 | # Default: "%(asctime)s %(message)s"
8 | messageFormat = "%(asctime)s [%(levelname)s] %(message)s"
9 |
10 | [browser]
11 |
12 | # Default: true
13 | gatherUsageStats = false
14 |
15 | [theme]
16 |
17 | primaryColor="#818cf8"
18 | backgroundColor="#FFFFFF"
19 | secondaryBackgroundColor="#F0F2F6"
20 | textColor="#262730"
21 | font="sans serif"
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://llamaindexchat.streamlit.app/)
2 | [](https://www.python.org/)
3 | [](https://www.codefactor.io/repository/github/dcarpintero/llamaindexchat)
4 | [](https://github.com/dcarpintero/st-newsapi-connector/blob/main/LICENSE)
5 |
6 | # Chat with 🦙 LlamaIndex Docs 🗂️
7 |
8 |
9 |
10 |
11 |
12 | Chatbot using [LlamaIndex](https://www.llamaindex.ai/) to supplement OpenAI GPT-3.5 Large Language Model (LLM) with the [LlamaIndex Documentation](https://gpt-index.readthedocs.io/en/latest/index.html). Main features:
13 |
14 | - **Transparency and Evaluation**: by customizing the metadata field of documents (and nodes), the App is able to provide links to the sources of the responses, along with the author and relevance score of each source node. This ensures the answers can be cross-referenced with the original content to check for accuracy.
15 | - **Estimating Inference Costs**: tracks 'LLM Prompt Tokens' and 'LLM Completion Tokens' to help keep inference costs under control.
16 | - **Reducing Costs**: persists storage including embedding vectors, and caches the questions / responses to reduce the number of calls to the LLM.
17 | - **Usability**: includes suggestions for questions, and basic functionality to clear chat history.
18 |
19 | ## 🦙 What's LlamaIndex?
20 |
21 | > *LlamaIndex is a simple, flexible data framework for connecting custom data sources to large language models. [...] It helps in preparing a knowledge base by ingesting data from different sources and formats using data connectors. The data is then represented as documents and nodes, where a node is the atomic unit of data in LlamaIndex. Once the data is ingested, LlamaIndex indexes the data into a format that is easy to retrieve. It uses different indexes such as the VectorStoreIndex, Summary Index, Tree Index, and Keyword Table Index. In the querying stage, LlamaIndex retrieves the most relevant context given a user query and synthesizes a response using a response synthesizer. [Response from our Chatbot to the query 'What's LlamaIndex?']*
22 |
23 | ## 📋 How does it work?
24 |
25 | LlamaIndex enriches LLMs (for simplicity, we default the [ServiceContext](https://gpt-index.readthedocs.io/en/latest/core_modules/supporting_modules/service_context.html) to OpenAI GPT-3.5 which is then used for indexing and querying) with a custom knowledge base through a process called [Retrieval Augmented Generation (RAG)](https://research.ibm.com/blog/retrieval-augmented-generation-RAG) that involves the following steps:
26 |
27 | - **Connecting to a External Datasource**: We use the [Github Repository Loader](https://llamahub.ai/l/github_repo) available at [LlamaHub](https://llamahub.ai/) (an open-source repository for data loaders) to connect to the Github repository containing the markdown files of the LlamaIndex Docs:
28 |
29 | ```python
30 | def initialize_github_loader(github_token: str) -> GithubRepositoryReader:
31 | """Initialize GithubRepositoryReader"""
32 |
33 | download_loader("GithubRepositoryReader")
34 | github_client = GithubClient(github_token)
35 |
36 | loader = GithubRepositoryReader(github_client, [...])
37 |
38 | return loader
39 | ```
40 |
41 | - **Constructing Documents**: The markdown files of the Github repository are ingested and automatically converted to Document objects. In addition, we add the dictionary {'filename': '', 'author': ''} to the metadata of each document (which will be inhereited by the nodes). This will allow us to retrieve and display the data sources and scores in the chatbot responses to make our App more transparent:
42 |
43 | ```python
44 | def load_and_index_data(loader: GithubRepositoryReader) -> :
45 | """Load Knowledge Base from GitHub Repository"""
46 |
47 | logging.info("Loading data from Github: %s/%s", loader._owner, loader._repo)
48 | docs = loader.load_data(branch="main")
49 | for doc in docs:
50 | doc.metadata = {'filename': doc.extra_info['file_name'], 'author': "LlamaIndex"}
51 | ```
52 |
53 | - **Parsing Nodes**: Nodes represent a *chunk* of a source Document, we have defined a chunk size of '1024' with an overlap of '32'. Similar to Documents, Nodes contain metadata and relationship information with other nodes.
54 | ```python
55 | [...]
56 |
57 | logging.info("Parsing documents into nodes...")
58 | parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=32)
59 | nodes = parser.get_nodes_from_documents(docs)
60 | ```
61 |
62 | - **Indexing**: An Index is a data structure that allows to quickly retrieve relevant context for a user query. For LlamaIndex, it's the core foundation for retrieval-augmented generation (RAG) use-cases. LlamaIndex provides different types of indices, such as the [VectorStoreIndex](https://gpt-index.readthedocs.io/en/latest/core_modules/data_modules/index/index_guide.html), which makes LLM calls to compute embeddings:
63 |
64 | ```python
65 | [...]
66 |
67 | logging.info("Indexing nodes...")
68 | index = VectorStoreIndex(nodes)
69 |
70 | logging.info("Persisting index on ./storage...")
71 | index.storage_context.persist(persist_dir="./storage")
72 |
73 | logging.info("Data-Knowledge ingestion process is completed (OK)")
74 | ```
75 |
76 | - **Querying (with cache)**: Once the index is constructed, querying a vector store index involves fetching the top-k most similar Nodes (by default 2), and passing those into the Response Synthesis module. The top Nodes are then appended to the user's prompt and passed to the LLM. We rely on the [Streamlit caching mechanism](https://docs.streamlit.io/library/advanced-features/caching) to optimize the performance and reduce the number of calls to the LLM:
77 |
78 | ```python
79 | @st.cache_data(max_entries=1024, show_spinner=False)
80 | def query_chatengine_cache(prompt, _chat_engine, settings):
81 | return _chat_engine.chat(prompt)
82 | ```
83 |
84 | - **Parsing Response**: The App parses the response source nodes to extract the filename, author and score of the top-k similar Nodes (from which the answer was retrieved):
85 |
86 | ```python
87 | def get_metadata(response):
88 | sources = []
89 | for item in response.source_nodes:
90 | if hasattr(item, "metadata"):
91 | filename = item.metadata.get('filename').replace('\\', '/')
92 | author = item.metadata.get('author')
93 | score = float("{:.3f}".format(item.score))
94 | sources.append({'filename': filename, 'author': author, 'score': score})
95 |
96 | return sources
97 | ```
98 |
99 | - **Transparent Results with Source Citation**: The use of metadata enables to display links to the sources along with the author and relevance scores from which the answer was retrieved:
100 |
101 |
102 |
103 |
104 |
105 |
106 | - **Estimating Inference Cost**: By using [TokenCountingHandler](https://docs.llamaindex.ai/en/stable/examples/callbacks/TokenCountingHandler.html), the App tracks the number of 'LLM Prompt Tokens' and 'LLM Completion Tokens' to estimate the overall [GTP-3.5 inference costs](https://openai.com/pricing).
107 |
108 | ```python
109 | token_counter = TokenCountingHandler(
110 | tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
111 | verbose=False
112 | )
113 |
114 | callback_manager = CallbackManager([token_counter])
115 | service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo"), callback_manager=callback_manager)
116 | ```
117 |
118 |
119 | ## 🚀 Quickstart
120 |
121 | 1. Clone the repository:
122 | ```
123 | git clone git@github.com:dcarpintero/chatwithweb3.git
124 | ```
125 |
126 | 2. Create and Activate a Virtual Environment:
127 |
128 | ```
129 | Windows:
130 |
131 | py -m venv .venv
132 | .venv\scripts\activate
133 |
134 | macOS/Linux
135 |
136 | python3 -m venv .venv
137 | source .venv/bin/activate
138 | ```
139 |
140 | 3. Install dependencies:
141 |
142 | ```
143 | pip install -r requirements.txt
144 | ```
145 |
146 | 4. Ingest Knowledge Base
147 | ```
148 | python ingest_knowledge.py
149 | ```
150 |
151 | 5. Launch Web Application
152 |
153 | ```
154 | streamlit run ./app.py
155 | ```
156 |
157 | ## 👩💻 Streamlit Web App
158 |
159 | Demo Web App deployed to [Streamlit Cloud](https://streamlit.io/cloud) and available at https://llamaindexchat.streamlit.app/
160 |
161 | ## 📚 References
162 |
163 | - [LLamaIndex Doc Reference](https://gpt-index.readthedocs.io/en/latest/index.html)
164 | - [Get Started with Streamlit Cloud](https://docs.streamlit.io/streamlit-community-cloud/get-started)
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | """
2 | Streamlit application that integrates with LlamaIndex and OpenAI's GPT-3.5 to create a conversational interface.
3 | Users can ask questions about LlamaIndex Docs, and the application provides relevant answers.
4 | The user's OpenAI API key is used to fetch responses from GPT-3.5.
5 |
6 | Author:
7 | @dcarpintero : https://github.com/dcarpintero
8 | """
9 | from llama_index.llms import OpenAI
10 | from llama_index import ServiceContext, StorageContext, load_index_from_storage
11 | from llama_index.callbacks import CallbackManager, TokenCountingHandler
12 | import openai
13 | import tiktoken
14 | import streamlit as st
15 |
16 |
17 | st.set_page_config(
18 | page_title="Chat with LlamaIndex Docs",
19 | page_icon="🦙",
20 | initial_sidebar_state="expanded",
21 | menu_items={"About": "Built by @dcarpintero with Streamlit & LLamaIndex"},
22 | )
23 |
24 | if 'llm_prompt_tokens' not in st.session_state:
25 | st.session_state['llm_prompt_tokens'] = 0
26 |
27 | if 'llm_completion_tokens' not in st.session_state:
28 | st.session_state['llm_completion_tokens'] = 0
29 |
30 | if 'openai_api_key' in st.session_state:
31 | openai.api_key = st.session_state['openai_api_key']
32 |
33 |
34 | @st.cache_resource(show_spinner=False)
35 | def load_data():
36 | """Load VectorStoreIndex from storage."""
37 |
38 | with st.spinner("Loading Vector Store Index..."):
39 | token_counter = TokenCountingHandler(
40 | tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
41 | verbose=False
42 | )
43 |
44 | callback_manager = CallbackManager([token_counter])
45 | service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo"), callback_manager=callback_manager)
46 | index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./storage"), service_context=service_context)
47 |
48 | return index, token_counter
49 |
50 | def display_chat_history(messages):
51 | """Display previous chat messages."""
52 |
53 | for message in messages:
54 | with st.chat_message(message["role"]):
55 | if st.session_state.with_sources:
56 | if "sources" in message:
57 | st.info(f'The sources of this response are:\n\n {message["sources"]}')
58 | st.write(message["content"])
59 |
60 |
61 | def clear_chat_history():
62 | """"Clear chat history and reset questions' buttons."""
63 |
64 | st.session_state.messages = [
65 | {"role": "assistant", "content": "Try one of the sample questions or ask your own!"}
66 | ]
67 | st.session_state["btn_llama_index"] = False
68 | st.session_state["btn_retriever"] = False
69 | st.session_state["btn_diff"] = False
70 | st.session_state["btn_rag"] = False
71 |
72 |
73 | def generate_assistant_response(prompt, chat_engine):
74 | """Generate assistant response and update token counter."""
75 |
76 | with st.chat_message("assistant"):
77 | with st.spinner("I am on it..."):
78 | if st.session_state.with_cache:
79 | response = query_chatengine_cache(prompt, chat_engine)
80 | else:
81 | response = query_chatengine(prompt, chat_engine)
82 |
83 | message = {"role": "assistant", "content": response.response, "sources": format_sources(response)}
84 | if st.session_state.with_sources:
85 | st.info(f'The sources of this response are:\n\n {message["sources"]}')
86 | st.write(message["content"])
87 |
88 | st.session_state.messages.append(message)
89 |
90 |
91 | @st.cache_data(max_entries=1024, show_spinner=False)
92 | def query_chatengine_cache(prompt, _chat_engine):
93 | """Query chat engine and cache results."""
94 | return _chat_engine.chat(prompt)
95 |
96 |
97 | def query_chatengine(prompt, chat_engine):
98 | """Query chat engine."""
99 | return chat_engine.chat(prompt)
100 |
101 |
102 | def format_sources(response):
103 | """Format filename, authors and scores of the response source nodes."""
104 | base = "https://github.com/jerryjliu/llama_index/tree/main/"
105 | return "\n".join([f"- {base}{source['filename']} (author: '{source['author']}'; score: {source['score']})\n" for source in get_metadata(response)])
106 |
107 |
108 | def get_metadata(response):
109 | """Parse response source nodes and return a list of dictionaries with filenames, authors and scores."""
110 |
111 | sources = []
112 | for item in response.source_nodes:
113 | if hasattr(item, "metadata"):
114 | filename = item.metadata.get('filename').replace('\\', '/')
115 | author = item.metadata.get('author')
116 | score = float("{:.3f}".format(item.score))
117 | sources.append({'filename': filename, 'author': author, 'score': score})
118 |
119 | return sources
120 |
121 |
122 | def update_token_counters(token_counter):
123 | """Update token counters """
124 |
125 | st.session_state['llm_prompt_tokens'] += token_counter.prompt_llm_token_count
126 | st.session_state['llm_completion_tokens'] += token_counter.completion_llm_token_count
127 |
128 | # reset counter to avoid miscounting when the answer is cached!
129 | token_counter.reset_counts()
130 |
131 |
132 | def sidebar():
133 | """Configure the sidebar and user's preferences."""
134 |
135 | with st.sidebar.expander("🔑 OPENAI-API-KEY", expanded=True):
136 | st.text_input(label='OPENAI-API-KEY', type='password', key='openai_api_key', label_visibility='hidden').strip()
137 | "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
138 |
139 | with st.sidebar.expander("💲 GPT3.5 INFERENCE COST", expanded=True):
140 | i_tokens = st.session_state['llm_prompt_tokens']
141 | o_tokens = st.session_state['llm_completion_tokens']
142 | st.markdown(f'LLM Prompt: {i_tokens} tokens')
143 | st.markdown(f'LLM Completion: {o_tokens} tokens')
144 |
145 | i_cost = (i_tokens / 1000) * 0.0015
146 | o_cost = (o_tokens / 1000) * 0.002
147 | st.markdown('**Cost Estimation: ${0}**'.format(round(i_cost + o_cost, 5)))
148 | "[OpenAI Pricing](https://openai.com/pricing)"
149 |
150 | with st.sidebar.expander("🔧 SETTINGS", expanded=True):
151 | st.toggle('Cache Results', value=True, key="with_cache")
152 | st.toggle('Display Sources', value=True, key="with_sources")
153 | st.toggle('Streaming', value=False, disabled=True, key="with_streaming")
154 |
155 | st.sidebar.button('Clear Messages', type="primary", on_click=clear_chat_history)
156 | st.sidebar.divider()
157 | with st.sidebar:
158 | col_ll, col_gh = st.columns([1, 1])
159 | with col_ll:
160 | "[](https://gpt-index.readthedocs.io/en/latest/index.html)"
161 | with col_gh:
162 | "[](https://github.com/dcarpintero/llamaindexchat)"
163 |
164 |
165 | def layout():
166 | """"Layout"""
167 |
168 | st.header("Chat with 🦙 LlamaIndex Docs 🗂️")
169 |
170 | # Get Started
171 | if not openai.api_key:
172 | st.warning("Hi there! Add your OPENAI-API-KEY on the sidebar field to get started!\n\n", icon="🚨")
173 | st.stop()
174 |
175 | # Load Index
176 | index, token_counter = load_data()
177 | if index:
178 | chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
179 |
180 | # Sample Questions for User input
181 | user_input_button = None
182 |
183 | btn_llama_index = st.session_state.get("btn_llama_index", False)
184 | btn_retriever = st.session_state.get("btn_retriever", False)
185 | btn_diff = st.session_state.get("btn_diff", False)
186 | btn_rag = st.session_state.get("btn_rag", False)
187 |
188 | col1, col2, col3, col4 = st.columns([1,1,1,1])
189 |
190 | with col1:
191 | if st.button("explain the basic usage pattern of LlamaIndex", type="primary", disabled=btn_llama_index):
192 | user_input_button = "explain the basic usage pattern in LlamaIndex"
193 | st.session_state.btn_llama_index = True
194 | with col2:
195 | if st.button("how can I ingest data from the GoogleDocsReader?", type="primary", disabled=btn_retriever):
196 | user_input_button = "how can I ingest data from the GoogleDocsReader?"
197 | st.session_state.btn_retriever = True
198 | with col3:
199 | if st.button("what's the difference between document & node?", type="primary", disabled=btn_diff):
200 | user_input_button = "what's the difference between document and node?"
201 | st.session_state.btn_diff = True
202 | with col4:
203 | if st.button("how can I make a RAG application performant?", type="primary", disabled=btn_rag):
204 | user_input_button = "how can I make a RAG application performant?"
205 | st.session_state.btn_rag = True
206 |
207 | # System Message
208 | if "messages" not in st.session_state:
209 | st.session_state.messages = [
210 | {"role": "assistant", "content": "Try one of the sample questions or ask your own!"}
211 | ]
212 |
213 | # User input
214 | user_input = st.chat_input("Your question")
215 | if user_input or user_input_button:
216 | st.session_state.messages.append({"role": "user", "content": user_input or user_input_button})
217 |
218 | # Display previous chat
219 | display_chat_history(st.session_state.messages)
220 |
221 | # Generate response
222 | if st.session_state.messages[-1]["role"] != "assistant":
223 | try:
224 | generate_assistant_response(user_input or user_input_button, chat_engine)
225 | update_token_counters(token_counter)
226 |
227 | except Exception as ex:
228 | st.error(str(ex))
229 |
230 |
231 | def main():
232 | """Set up user preferences, and layout"""
233 | sidebar()
234 | layout()
235 |
236 | if __name__ == "__main__":
237 | main()
238 |
--------------------------------------------------------------------------------
/assets/llamaindexchat-demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dcarpintero/llamaindexchat/74ea0e186b670fe429e8bf58d921f77d0e01f8f2/assets/llamaindexchat-demo.mp4
--------------------------------------------------------------------------------
/assets/llamaindexchat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dcarpintero/llamaindexchat/74ea0e186b670fe429e8bf58d921f77d0e01f8f2/assets/llamaindexchat.png
--------------------------------------------------------------------------------
/assets/sourcecitation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dcarpintero/llamaindexchat/74ea0e186b670fe429e8bf58d921f77d0e01f8f2/assets/sourcecitation.png
--------------------------------------------------------------------------------
/ingest_knowledge.py:
--------------------------------------------------------------------------------
1 | """
2 | Knowledge Ingestion of the markdown files in the docs folder of the llama_index repository:
3 | - https://github.com/jerryjliu/llama_index
4 |
5 | Built with LlamaIndex, GithubRepositoryReader, and OpenAI.
6 |
7 | Author:
8 | @dcarpintero : https://github.com/dcarpintero
9 | """
10 | from llama_index import download_loader, VectorStoreIndex
11 | from llama_index.node_parser import SimpleNodeParser
12 | from llama_hub.github_repo import GithubRepositoryReader, GithubClient
13 | from dotenv import load_dotenv
14 | import openai
15 | import os
16 | import logging
17 |
18 |
19 | def load_environment_vars() -> dict:
20 | """Load required environment variables. Raise an exception if any are missing."""
21 |
22 | load_dotenv()
23 |
24 | api_key = os.getenv("OPENAI_API_KEY")
25 | github_token = os.getenv("GITHUB_TOKEN")
26 |
27 | if not api_key:
28 | raise EnvironmentError("OPENAI_API_KEY environment variable not set.")
29 |
30 | if not github_token:
31 | raise EnvironmentError("GITHUB_TOKEN environment variable not set.")
32 |
33 | logging.info("Environment variables loaded.")
34 | return {"OPENAI_API_KEY": api_key, "GITHUB_TOKEN": github_token}
35 |
36 |
37 | def initialize_github_loader(github_token: str) -> GithubRepositoryReader:
38 | """Initialize GithubRepositoryReader"""
39 |
40 | download_loader("GithubRepositoryReader")
41 | github_client = GithubClient(github_token)
42 |
43 | loader = GithubRepositoryReader(
44 | github_client,
45 | owner = "jerryjliu",
46 | repo = "llama_index",
47 | filter_directories = (["docs"], GithubRepositoryReader.FilterType.INCLUDE),
48 | filter_file_extensions = ([".md"], GithubRepositoryReader.FilterType.INCLUDE),
49 | verbose = False,
50 | concurrent_requests = 10,
51 | )
52 |
53 | return loader
54 |
55 |
56 | def load_and_index_data(loader) -> VectorStoreIndex:
57 | """Load and Index Knowledge Base from GitHub Repository"""
58 |
59 | docs = load_data(loader)
60 | index = index_data(docs)
61 | return index
62 |
63 |
64 | def load_data(loader: GithubRepositoryReader) -> []:
65 | """Load Knowledge Base from GitHub Repository"""
66 |
67 | logging.info("Loading data from Github: %s/%s", loader._owner, loader._repo)
68 | docs = loader.load_data(branch="main")
69 | for doc in docs:
70 | logging.info(doc.extra_info)
71 | doc.metadata = {'filename': doc.extra_info['file_name'], 'author': "LlamaIndex"}
72 |
73 | return docs
74 |
75 | def index_data(docs: []) -> VectorStoreIndex:
76 | """Index Documents"""
77 |
78 | logging.info("Parsing documents into nodes...")
79 | parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=32)
80 | nodes = parser.get_nodes_from_documents(docs)
81 |
82 | logging.info("Indexing nodes...")
83 | index = VectorStoreIndex(nodes)
84 |
85 | logging.info("Persisting index on ./storage...")
86 | index.storage_context.persist(persist_dir="./storage")
87 |
88 | logging.info("Data-Knowledge ingestion process is completed (OK)")
89 | return index
90 |
91 | if __name__ == "__main__":
92 | logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
93 |
94 | try:
95 | env_vars = load_environment_vars()
96 | openai.api_key = env_vars['OPENAI_API_KEY']
97 |
98 | loader = initialize_github_loader(env_vars['GITHUB_TOKEN'])
99 | load_and_index_data(loader)
100 | except Exception as ex:
101 | logging.error("Unexpected Error: %s", ex)
102 | raise ex
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-hub==0.0.38
2 | llama-index==0.8.44
3 | nltk==3.8.1
4 | openai==0.28.0
5 | python-dotenv==1.0.0
6 | streamlit==1.27.2
7 | tiktoken==0.5.1
8 | sentence-transformers==2.2.2
--------------------------------------------------------------------------------
/storage/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}
--------------------------------------------------------------------------------
/storage/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"7f938a8c-2e4a-472b-887a-8709b15b8e75": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7f938a8c-2e4a-472b-887a-8709b15b8e75\", \"summary\": null, \"nodes_dict\": {\"4437d6b6-ead6-4a1d-9251-df93995d95b8\": \"4437d6b6-ead6-4a1d-9251-df93995d95b8\", \"e6af2f51-3bf0-45b1-8297-620133b65139\": \"e6af2f51-3bf0-45b1-8297-620133b65139\", \"e75ac405-364a-43a9-b9c3-95e2bf6f4185\": \"e75ac405-364a-43a9-b9c3-95e2bf6f4185\", \"679f0feb-a0e4-450e-9d1d-f93372bc8235\": \"679f0feb-a0e4-450e-9d1d-f93372bc8235\", \"1e6e7b26-278d-414d-a59e-556fbe6e211f\": \"1e6e7b26-278d-414d-a59e-556fbe6e211f\", \"afa369ba-648a-4758-803a-1386ec8dfff9\": \"afa369ba-648a-4758-803a-1386ec8dfff9\", \"5bf8d116-1a65-47ea-b92c-dea5bee72bf3\": \"5bf8d116-1a65-47ea-b92c-dea5bee72bf3\", \"b025c8a5-eb7a-481d-9fed-688ab2e976be\": \"b025c8a5-eb7a-481d-9fed-688ab2e976be\", \"30e455b0-1e7b-43d2-a247-65f89200d59b\": \"30e455b0-1e7b-43d2-a247-65f89200d59b\", \"5ebcf58d-2516-4f5e-8d00-2cb8d086edaa\": \"5ebcf58d-2516-4f5e-8d00-2cb8d086edaa\", \"8122e965-68d2-4c8c-be75-f230747c599e\": \"8122e965-68d2-4c8c-be75-f230747c599e\", \"cce95214-17e7-4280-9ff7-f0339a6cc7b1\": \"cce95214-17e7-4280-9ff7-f0339a6cc7b1\", \"e2e0ae66-1caa-4ffb-b490-0e086abef319\": \"e2e0ae66-1caa-4ffb-b490-0e086abef319\", \"3b4a69ca-9f98-4e7e-8444-82b567e8452d\": \"3b4a69ca-9f98-4e7e-8444-82b567e8452d\", \"219e3285-8514-4d57-aabc-b9be90bcee77\": \"219e3285-8514-4d57-aabc-b9be90bcee77\", \"f426cf7b-db7b-4b36-970b-111415f503fb\": \"f426cf7b-db7b-4b36-970b-111415f503fb\", \"c0db3248-daff-412a-995f-9c26aee4eeb7\": \"c0db3248-daff-412a-995f-9c26aee4eeb7\", \"811a1f10-f697-4c37-9591-b6f8b87a9e11\": \"811a1f10-f697-4c37-9591-b6f8b87a9e11\", \"9f280e4b-20e3-47bf-bef0-cff81dd64831\": \"9f280e4b-20e3-47bf-bef0-cff81dd64831\", \"474c71b0-ce49-4b88-85a4-e1ec94ce3738\": \"474c71b0-ce49-4b88-85a4-e1ec94ce3738\", \"f8101c5f-71f6-4812-9e3b-6547502b5211\": \"f8101c5f-71f6-4812-9e3b-6547502b5211\", \"f7597301-8057-424e-9d46-f2e59157cd91\": \"f7597301-8057-424e-9d46-f2e59157cd91\", \"f2053e36-16d6-4f9e-bd97-ee8e5e4faad9\": \"f2053e36-16d6-4f9e-bd97-ee8e5e4faad9\", \"9f9535a8-5be2-48d7-bcfa-2345258ee81a\": \"9f9535a8-5be2-48d7-bcfa-2345258ee81a\", \"bd56e86b-54cc-4e3a-baa0-5e37c98227aa\": \"bd56e86b-54cc-4e3a-baa0-5e37c98227aa\", \"01e3977e-f6d5-45a2-b686-b5130b963879\": \"01e3977e-f6d5-45a2-b686-b5130b963879\", \"c8702fc4-96f5-4890-800b-41eab8d9e957\": \"c8702fc4-96f5-4890-800b-41eab8d9e957\", \"1c9e7a9f-0ee3-4670-9074-978616b35410\": \"1c9e7a9f-0ee3-4670-9074-978616b35410\", \"6f9c0fbd-5a60-4de4-8edc-b0c70177461d\": \"6f9c0fbd-5a60-4de4-8edc-b0c70177461d\", \"7c2aec87-a8df-4e9a-a304-1e62a164efce\": \"7c2aec87-a8df-4e9a-a304-1e62a164efce\", \"e52f36a5-f33b-43c1-9cb8-bb1b85b5911b\": \"e52f36a5-f33b-43c1-9cb8-bb1b85b5911b\", \"7dea053a-d622-4c32-b6ba-eb1b8c64a10f\": \"7dea053a-d622-4c32-b6ba-eb1b8c64a10f\", \"d73fccdc-8c0f-4b39-b308-1a16bb28b10c\": \"d73fccdc-8c0f-4b39-b308-1a16bb28b10c\", \"07ec6934-f93b-4bae-828e-38808b9e6bc6\": \"07ec6934-f93b-4bae-828e-38808b9e6bc6\", \"67c83569-5531-4a11-b0cb-5d95f16b7450\": \"67c83569-5531-4a11-b0cb-5d95f16b7450\", \"f9c4f6bb-62d7-42d6-8a92-5a1f035e0b70\": \"f9c4f6bb-62d7-42d6-8a92-5a1f035e0b70\", \"e22c2099-983c-4151-a2ef-b7e3ac87623a\": \"e22c2099-983c-4151-a2ef-b7e3ac87623a\", \"8dbd8b26-46bf-4ee8-866b-02e979cfea67\": \"8dbd8b26-46bf-4ee8-866b-02e979cfea67\", \"44715510-67b8-431c-9ad6-321b8cffc5d1\": \"44715510-67b8-431c-9ad6-321b8cffc5d1\", \"25180ddc-2ed7-409a-abb1-82f600017635\": \"25180ddc-2ed7-409a-abb1-82f600017635\", \"6cd88956-15fe-496c-b768-9bdc4107c57c\": \"6cd88956-15fe-496c-b768-9bdc4107c57c\", \"3a3b62b8-4da6-4dd5-ba96-338bbc9deb24\": \"3a3b62b8-4da6-4dd5-ba96-338bbc9deb24\", \"46562a93-963b-4a61-ba5c-c5cf7876b8b2\": \"46562a93-963b-4a61-ba5c-c5cf7876b8b2\", \"27040026-fa6b-4c67-86b4-71232e5e56da\": \"27040026-fa6b-4c67-86b4-71232e5e56da\", \"506de4fd-7b68-4a6b-ab6e-151903c85672\": \"506de4fd-7b68-4a6b-ab6e-151903c85672\", \"f9f1105f-8f5d-4942-a700-13f2bb1a6abb\": \"f9f1105f-8f5d-4942-a700-13f2bb1a6abb\", \"b6977624-9a24-46bb-a840-4d6132bdfe80\": \"b6977624-9a24-46bb-a840-4d6132bdfe80\", \"735bc8a0-d89a-422d-8b09-53b423ad58fd\": \"735bc8a0-d89a-422d-8b09-53b423ad58fd\", \"bfb19442-7092-4b42-a9a7-9c064aa615f8\": \"bfb19442-7092-4b42-a9a7-9c064aa615f8\", \"e8f9b733-7c89-4eca-b99d-9a07fcf20152\": \"e8f9b733-7c89-4eca-b99d-9a07fcf20152\", \"1308b8dc-b079-497f-b853-0b147528dfd2\": \"1308b8dc-b079-497f-b853-0b147528dfd2\", \"811b2cb7-db86-4a14-a8a9-46b0a494c161\": \"811b2cb7-db86-4a14-a8a9-46b0a494c161\", \"c1162b51-89f2-4f63-947a-8e53179cea32\": \"c1162b51-89f2-4f63-947a-8e53179cea32\", \"d8f9b13b-6791-42b3-bd6e-b000d8b06da2\": \"d8f9b13b-6791-42b3-bd6e-b000d8b06da2\", \"9b96a9ee-9180-4349-a49c-01882c8c2379\": \"9b96a9ee-9180-4349-a49c-01882c8c2379\", \"7fcb8820-8880-4da2-a3a4-89c1899defc9\": \"7fcb8820-8880-4da2-a3a4-89c1899defc9\", \"d82c4727-508f-414e-bd64-5a5e6ee875c7\": \"d82c4727-508f-414e-bd64-5a5e6ee875c7\", \"5576d7f8-447c-4cbb-84c3-314a78d44f8f\": \"5576d7f8-447c-4cbb-84c3-314a78d44f8f\", \"e48b0d18-726d-471d-9702-0a63215a1d2e\": \"e48b0d18-726d-471d-9702-0a63215a1d2e\", \"7acbc142-6cba-4d56-8100-21465098753a\": \"7acbc142-6cba-4d56-8100-21465098753a\", \"626a612e-5a29-45d3-a4e0-bf4e7c89b2d5\": \"626a612e-5a29-45d3-a4e0-bf4e7c89b2d5\", \"f73077f6-5d32-4c13-af90-2452491bcd54\": \"f73077f6-5d32-4c13-af90-2452491bcd54\", \"4dab9719-d28e-4d95-a862-eb6e16b925c7\": \"4dab9719-d28e-4d95-a862-eb6e16b925c7\", \"d8206bbc-6e15-4345-964f-0db151da7a97\": \"d8206bbc-6e15-4345-964f-0db151da7a97\", \"e0caea2b-f168-4262-9b95-224c6b8996b2\": \"e0caea2b-f168-4262-9b95-224c6b8996b2\", \"c2b9f3c1-0183-4266-8ee0-e3b421a1e7f0\": \"c2b9f3c1-0183-4266-8ee0-e3b421a1e7f0\", \"7d0f937c-a29c-4a7c-9ef3-682d8123adab\": \"7d0f937c-a29c-4a7c-9ef3-682d8123adab\", \"38868d0e-6a25-4e21-a37c-139717704171\": \"38868d0e-6a25-4e21-a37c-139717704171\", \"a9858431-5d39-4132-8408-d13cfcd97201\": \"a9858431-5d39-4132-8408-d13cfcd97201\", \"985acb4b-717c-4e17-b03f-4cad246da1c7\": \"985acb4b-717c-4e17-b03f-4cad246da1c7\", \"a612f8cc-967f-4289-bd5c-b1fd178bcd40\": \"a612f8cc-967f-4289-bd5c-b1fd178bcd40\", \"28c994a0-580e-4746-a12e-d5ac10b1f26f\": \"28c994a0-580e-4746-a12e-d5ac10b1f26f\", \"caa6858d-0b2d-462a-9e7d-a6b5ddfdbf0c\": \"caa6858d-0b2d-462a-9e7d-a6b5ddfdbf0c\", \"5a2d46f6-4ddf-48ce-b498-1b0d24cc2cb2\": \"5a2d46f6-4ddf-48ce-b498-1b0d24cc2cb2\", \"bccebf27-b755-4da4-9864-3e2098e6da3e\": \"bccebf27-b755-4da4-9864-3e2098e6da3e\", \"ac8b9270-ffc5-4520-b9d2-5b2834a8b691\": \"ac8b9270-ffc5-4520-b9d2-5b2834a8b691\", \"05d8bf6c-6183-4751-8a8a-1ae83a70a652\": \"05d8bf6c-6183-4751-8a8a-1ae83a70a652\", \"dccd2d0b-197d-4871-9aaf-f42ea6d4c119\": \"dccd2d0b-197d-4871-9aaf-f42ea6d4c119\", \"40f8c65b-652a-434b-83d5-d413348c015e\": \"40f8c65b-652a-434b-83d5-d413348c015e\", \"61aeaf9c-7e1b-4638-9dbb-aa23f87aefb4\": \"61aeaf9c-7e1b-4638-9dbb-aa23f87aefb4\", \"10dfb162-591a-4c8a-af2b-ad51ac5635a4\": \"10dfb162-591a-4c8a-af2b-ad51ac5635a4\", \"f9b0a12b-11c9-4a58-ae7f-31b9e2a8c3a9\": \"f9b0a12b-11c9-4a58-ae7f-31b9e2a8c3a9\", \"330c65bc-2b0e-4d90-89f0-e8c59f1a0b2e\": \"330c65bc-2b0e-4d90-89f0-e8c59f1a0b2e\", \"030b0cd8-541b-47e7-befb-69aaeeb69fd0\": \"030b0cd8-541b-47e7-befb-69aaeeb69fd0\", \"76b26bd0-4bde-4854-b485-e097abae0a0f\": \"76b26bd0-4bde-4854-b485-e097abae0a0f\", \"2551cd80-a535-4dec-8f2c-50095a3dfa39\": \"2551cd80-a535-4dec-8f2c-50095a3dfa39\", \"d8920524-7a92-4572-9c2a-b8a3e37a3970\": \"d8920524-7a92-4572-9c2a-b8a3e37a3970\", \"2fd9c628-f1f2-4c0c-8f1f-5acc1000985b\": \"2fd9c628-f1f2-4c0c-8f1f-5acc1000985b\", \"5be3f2b3-b666-4d89-afe9-9308b99dddfa\": \"5be3f2b3-b666-4d89-afe9-9308b99dddfa\", \"8e27449d-52ac-41cb-ba21-9dbf8b23ee0a\": \"8e27449d-52ac-41cb-ba21-9dbf8b23ee0a\", \"53d695ce-fc96-4ce3-9f81-a6f4bc42c7d9\": \"53d695ce-fc96-4ce3-9f81-a6f4bc42c7d9\", \"60e2a7b0-3822-4db0-b36f-fe4c5f79749f\": \"60e2a7b0-3822-4db0-b36f-fe4c5f79749f\", \"2f005cd4-360b-4e92-8a7a-7638c571428c\": \"2f005cd4-360b-4e92-8a7a-7638c571428c\", \"f848b2dc-c545-438a-aac6-2da67a02ad7d\": \"f848b2dc-c545-438a-aac6-2da67a02ad7d\", \"eb6b3f75-8e8f-4b4d-8ea7-405bad15aa42\": \"eb6b3f75-8e8f-4b4d-8ea7-405bad15aa42\", \"dc78f6aa-4eb3-4885-b984-635e33d81e1a\": \"dc78f6aa-4eb3-4885-b984-635e33d81e1a\", \"b35c455a-9d46-4ade-8432-722997229b01\": \"b35c455a-9d46-4ade-8432-722997229b01\", \"1f0e9a72-a9c4-468b-8f4f-7104f003068e\": \"1f0e9a72-a9c4-468b-8f4f-7104f003068e\", \"2169d002-ca5c-476c-8df3-eb9e39c74eaf\": \"2169d002-ca5c-476c-8df3-eb9e39c74eaf\", \"5ba2a608-17fa-4791-89f8-23f07a77bc3d\": \"5ba2a608-17fa-4791-89f8-23f07a77bc3d\", \"d10b62f5-9f13-484e-8f54-bb8ccaff358a\": \"d10b62f5-9f13-484e-8f54-bb8ccaff358a\", \"0c2a9fc8-fcd4-4543-ab26-deee440d6f69\": \"0c2a9fc8-fcd4-4543-ab26-deee440d6f69\", \"8eee734c-38a3-417c-b234-8cbb9df1b1aa\": \"8eee734c-38a3-417c-b234-8cbb9df1b1aa\", \"701411f3-d6cf-472f-9044-21e5e86f5f4a\": \"701411f3-d6cf-472f-9044-21e5e86f5f4a\", \"c40fb52a-59ce-4300-93ed-e52bc1c0a18f\": \"c40fb52a-59ce-4300-93ed-e52bc1c0a18f\", \"fa87eaf5-9ccf-4d36-bb6e-6619ef22e5e2\": \"fa87eaf5-9ccf-4d36-bb6e-6619ef22e5e2\", \"0912f890-b478-4670-9e15-d994aac2cf44\": \"0912f890-b478-4670-9e15-d994aac2cf44\", \"696f5357-db8c-4c4b-81f0-5259c91ce418\": \"696f5357-db8c-4c4b-81f0-5259c91ce418\", \"96540789-826e-40b9-ba7a-6137b1eae712\": \"96540789-826e-40b9-ba7a-6137b1eae712\", \"6f88b8dc-f3a2-4c1a-bd4b-7b50716f06c6\": \"6f88b8dc-f3a2-4c1a-bd4b-7b50716f06c6\", \"1bbaca72-5f2b-4df7-9b82-2fbc645f8eb2\": \"1bbaca72-5f2b-4df7-9b82-2fbc645f8eb2\", \"20d0b081-42d7-4dfc-890f-a3f22e25a4ca\": \"20d0b081-42d7-4dfc-890f-a3f22e25a4ca\", \"0696e8c1-c965-456c-a011-9ef3e297b71a\": \"0696e8c1-c965-456c-a011-9ef3e297b71a\", \"96252df0-81a0-4037-bb4f-3fdc546ef84d\": \"96252df0-81a0-4037-bb4f-3fdc546ef84d\", \"c3288a8a-9ee9-4db9-b48b-1d5a7d091913\": \"c3288a8a-9ee9-4db9-b48b-1d5a7d091913\", \"948fa402-50f5-47b4-bbf9-cfb44fb9b649\": \"948fa402-50f5-47b4-bbf9-cfb44fb9b649\", \"0dbc1fbc-a574-4dd3-b0a0-df32bd0218b0\": \"0dbc1fbc-a574-4dd3-b0a0-df32bd0218b0\", \"127950f3-614b-4ab1-aa62-17af686c4039\": \"127950f3-614b-4ab1-aa62-17af686c4039\", \"a495af7c-c87a-496b-a868-5b8f30953f42\": \"a495af7c-c87a-496b-a868-5b8f30953f42\", \"d75a6a73-65df-4a57-96bf-41bf1d77ab17\": \"d75a6a73-65df-4a57-96bf-41bf1d77ab17\", \"8c21c0d0-cf6b-4a03-80c6-66f9bb5c153b\": \"8c21c0d0-cf6b-4a03-80c6-66f9bb5c153b\", \"3e553d2d-28b3-4576-bd6a-e4833517ee4b\": \"3e553d2d-28b3-4576-bd6a-e4833517ee4b\", \"290972af-dd34-49b7-8c81-43d512b3d500\": \"290972af-dd34-49b7-8c81-43d512b3d500\", \"d9f52f29-8ac7-40e5-81e6-b4e1cf135561\": \"d9f52f29-8ac7-40e5-81e6-b4e1cf135561\", \"6558c03c-1f31-416b-9f95-f9a8e51c90f8\": \"6558c03c-1f31-416b-9f95-f9a8e51c90f8\", \"30cc6acc-df1d-4249-be11-3c1e90cbc9a9\": \"30cc6acc-df1d-4249-be11-3c1e90cbc9a9\", \"d0afa067-1613-42a3-a67d-d440ad8901b2\": \"d0afa067-1613-42a3-a67d-d440ad8901b2\", \"e319874d-fc82-4b5a-a288-52af9c990a82\": \"e319874d-fc82-4b5a-a288-52af9c990a82\", \"81d03ce7-6627-45a7-8a9f-66374ec48eb0\": \"81d03ce7-6627-45a7-8a9f-66374ec48eb0\", \"c27b9bc8-c421-4da3-8b0b-2a72343235a9\": \"c27b9bc8-c421-4da3-8b0b-2a72343235a9\", \"4b7ca0e6-132d-4928-a648-c728ad3171a1\": \"4b7ca0e6-132d-4928-a648-c728ad3171a1\", \"afdaba8b-1eea-488d-b4da-ebab174cec70\": \"afdaba8b-1eea-488d-b4da-ebab174cec70\", \"8c626d5e-a15a-43a8-be3f-18a0a556655a\": \"8c626d5e-a15a-43a8-be3f-18a0a556655a\", \"98988af5-7ed4-42ce-a24e-b2eae1327a55\": \"98988af5-7ed4-42ce-a24e-b2eae1327a55\", \"bfa861a8-eead-4df4-b4b9-0987c129ad3e\": \"bfa861a8-eead-4df4-b4b9-0987c129ad3e\", \"31d74118-6633-45bd-939d-20eb4f24a622\": \"31d74118-6633-45bd-939d-20eb4f24a622\", \"97ad579d-fce9-484d-8665-0c18c2151d28\": \"97ad579d-fce9-484d-8665-0c18c2151d28\", \"2bc3ad40-0112-49aa-8bf1-7aa42d89642f\": \"2bc3ad40-0112-49aa-8bf1-7aa42d89642f\", \"cd444059-4829-454b-84a7-5890a6c92a80\": \"cd444059-4829-454b-84a7-5890a6c92a80\", \"b5736017-4998-4aea-ba88-7fcd6b544808\": \"b5736017-4998-4aea-ba88-7fcd6b544808\", \"cc232f0b-69fb-45af-bbc9-77e97ff5e84c\": \"cc232f0b-69fb-45af-bbc9-77e97ff5e84c\", \"45e5b4de-d494-4270-880b-1fa853e039ab\": \"45e5b4de-d494-4270-880b-1fa853e039ab\", \"49ce9594-5f43-407c-9e20-4366e340ff72\": \"49ce9594-5f43-407c-9e20-4366e340ff72\", \"6868a6e6-abd1-4a18-a7d4-0a8bc78aaa29\": \"6868a6e6-abd1-4a18-a7d4-0a8bc78aaa29\", \"824f712c-18a8-49e1-b746-94906a75383d\": \"824f712c-18a8-49e1-b746-94906a75383d\", \"d35927e0-0092-4e38-b844-9f1d050d2862\": \"d35927e0-0092-4e38-b844-9f1d050d2862\", \"efa12611-2447-4d58-8a08-26a3704535b8\": \"efa12611-2447-4d58-8a08-26a3704535b8\", \"8a8f7393-61ee-48f4-a416-8165f53882ac\": \"8a8f7393-61ee-48f4-a416-8165f53882ac\", \"657efc36-df83-4e56-933f-89039a0395a5\": \"657efc36-df83-4e56-933f-89039a0395a5\", \"72a3bcd2-7945-444e-965b-c18a766f7a5d\": \"72a3bcd2-7945-444e-965b-c18a766f7a5d\", \"1d32a6ac-f244-4992-bd34-440a2182a532\": \"1d32a6ac-f244-4992-bd34-440a2182a532\", \"01a3a473-0083-40fa-8a11-75f394861e2d\": \"01a3a473-0083-40fa-8a11-75f394861e2d\", \"15707a2c-3f1e-4723-86fd-67091dcf67ba\": \"15707a2c-3f1e-4723-86fd-67091dcf67ba\", \"8c778fa6-168d-4457-a731-fcefc050a6d3\": \"8c778fa6-168d-4457-a731-fcefc050a6d3\", \"52e7f52d-519c-48fd-85e3-73a0d296b135\": \"52e7f52d-519c-48fd-85e3-73a0d296b135\", \"2fb6d4f0-f671-424e-8640-8accc6adab3b\": \"2fb6d4f0-f671-424e-8640-8accc6adab3b\", \"96dca026-efdc-4ac7-90bf-906b643f1573\": \"96dca026-efdc-4ac7-90bf-906b643f1573\", \"a27331a1-1bad-4ccd-9bd0-429465d80172\": \"a27331a1-1bad-4ccd-9bd0-429465d80172\", \"6dcacd63-f1ce-4905-a2b3-a367a12576da\": \"6dcacd63-f1ce-4905-a2b3-a367a12576da\", \"a0d2eca7-f20c-49eb-9cd7-6a80d576903c\": \"a0d2eca7-f20c-49eb-9cd7-6a80d576903c\", \"3b1f1bc5-e57a-4694-b624-7aa73d18f9d9\": \"3b1f1bc5-e57a-4694-b624-7aa73d18f9d9\", \"9d950bef-82ba-44c8-8f18-975040b880f2\": \"9d950bef-82ba-44c8-8f18-975040b880f2\", \"6e0f2a6d-1eb9-4f30-b31b-55ae0e800e41\": \"6e0f2a6d-1eb9-4f30-b31b-55ae0e800e41\", \"f1bc3745-7815-4391-8174-7fc8d283a3b4\": \"f1bc3745-7815-4391-8174-7fc8d283a3b4\", \"a607c6b3-397e-4db9-b6ae-e7abd7a7dcca\": \"a607c6b3-397e-4db9-b6ae-e7abd7a7dcca\", \"2ccd8336-2484-4eae-97b2-12f9eae6c1e6\": \"2ccd8336-2484-4eae-97b2-12f9eae6c1e6\", \"ee52c155-2606-41fe-9846-bfd036b7b5e2\": \"ee52c155-2606-41fe-9846-bfd036b7b5e2\", \"d1231944-90c4-4c97-b5d7-fde7e74ccc21\": \"d1231944-90c4-4c97-b5d7-fde7e74ccc21\", \"45c9d7ae-cd4f-4588-b588-fdfee152552e\": \"45c9d7ae-cd4f-4588-b588-fdfee152552e\", \"2b2b1191-5427-4142-af6c-010b7c64bcbc\": \"2b2b1191-5427-4142-af6c-010b7c64bcbc\", \"7a0954fe-8e8c-402f-b89c-ebb4b163df9e\": \"7a0954fe-8e8c-402f-b89c-ebb4b163df9e\", \"8c1168ba-de1c-4147-a569-fbae40ef8a50\": \"8c1168ba-de1c-4147-a569-fbae40ef8a50\", \"7b5e823d-e520-446c-962d-0235c8a2c5c5\": \"7b5e823d-e520-446c-962d-0235c8a2c5c5\", \"6b920c82-667e-473f-a2c0-2b881870a3f2\": \"6b920c82-667e-473f-a2c0-2b881870a3f2\", \"64db79af-099c-48d7-b5b9-85fd7709df64\": \"64db79af-099c-48d7-b5b9-85fd7709df64\", \"258808ef-4ff1-4f26-a477-640304ad78bb\": \"258808ef-4ff1-4f26-a477-640304ad78bb\", \"5772201a-3ccc-4a5d-acaf-a9eb0639fe43\": \"5772201a-3ccc-4a5d-acaf-a9eb0639fe43\", \"a7854179-f654-4452-bee0-2e6374805dfc\": \"a7854179-f654-4452-bee0-2e6374805dfc\", \"c536587f-791c-4772-8240-4ba228f5940c\": \"c536587f-791c-4772-8240-4ba228f5940c\", \"b3d9f8c2-cbbc-48cc-9629-8c91b5fdb8bd\": \"b3d9f8c2-cbbc-48cc-9629-8c91b5fdb8bd\", \"c0afb204-4477-4656-aaf8-694298244255\": \"c0afb204-4477-4656-aaf8-694298244255\", \"de9ae151-e2de-44ac-b01a-b5e2646da0cb\": \"de9ae151-e2de-44ac-b01a-b5e2646da0cb\", \"b8cc04a2-dcdc-4906-93a6-5680da2419b9\": \"b8cc04a2-dcdc-4906-93a6-5680da2419b9\", \"2e13ebfd-5b4e-4cf0-ada8-90a14f3b2460\": \"2e13ebfd-5b4e-4cf0-ada8-90a14f3b2460\", \"85dcd5ea-0c20-4794-b57b-921ea5d26d07\": \"85dcd5ea-0c20-4794-b57b-921ea5d26d07\", \"dc9241f9-6eab-4ef3-9b1e-8d8b848793b1\": \"dc9241f9-6eab-4ef3-9b1e-8d8b848793b1\", \"cc31c75a-073c-454c-afa5-386cca33651b\": \"cc31c75a-073c-454c-afa5-386cca33651b\", \"cfb0fbc0-d960-4800-9444-02cb6807cec8\": \"cfb0fbc0-d960-4800-9444-02cb6807cec8\", \"32cff3f3-52e3-49c6-aed1-ef7cf3e01a80\": \"32cff3f3-52e3-49c6-aed1-ef7cf3e01a80\", \"e752d1df-66ca-49a5-a082-c5c5924c6bde\": \"e752d1df-66ca-49a5-a082-c5c5924c6bde\", \"5dadfaa5-7c77-47b0-8095-e55f75b47de2\": \"5dadfaa5-7c77-47b0-8095-e55f75b47de2\", \"f3cec787-bb9f-4a16-b5c1-8f2b60e67e51\": \"f3cec787-bb9f-4a16-b5c1-8f2b60e67e51\", \"cd8c80cc-4f13-4cd6-bce3-e120fd8e252e\": \"cd8c80cc-4f13-4cd6-bce3-e120fd8e252e\", \"8fd22a72-b9d0-44e8-a82d-daad4ee4d958\": \"8fd22a72-b9d0-44e8-a82d-daad4ee4d958\", \"5d5471e1-ed95-4deb-83ec-eb06ae3147bf\": \"5d5471e1-ed95-4deb-83ec-eb06ae3147bf\", \"e43348b4-496d-466c-ab26-1b0027fb8ede\": \"e43348b4-496d-466c-ab26-1b0027fb8ede\", \"c586ca61-5735-49bc-ac31-dbc9677f884a\": \"c586ca61-5735-49bc-ac31-dbc9677f884a\", \"5f685536-34c9-4246-b8de-d9babb267b3c\": \"5f685536-34c9-4246-b8de-d9babb267b3c\", \"5863ca78-b666-457d-ac16-851c2e683cc9\": \"5863ca78-b666-457d-ac16-851c2e683cc9\", \"36c049ca-bfa2-4835-9c50-b1e5f333e759\": \"36c049ca-bfa2-4835-9c50-b1e5f333e759\", \"41bc4137-2d62-416e-9f3d-c92b75b40965\": \"41bc4137-2d62-416e-9f3d-c92b75b40965\", \"4a242449-8afc-476b-b24b-71b5aadb4cfe\": \"4a242449-8afc-476b-b24b-71b5aadb4cfe\", \"1348302c-f1a8-4312-90db-958594383c5a\": \"1348302c-f1a8-4312-90db-958594383c5a\", \"f1aaf379-77fe-4e70-adbe-adc62ec58f48\": \"f1aaf379-77fe-4e70-adbe-adc62ec58f48\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
--------------------------------------------------------------------------------