├── LICENSE ├── PITS_APP ├── app.py ├── cache │ └── .gitignore ├── conversation_engine.py ├── document_uploader.py ├── global_settings.py ├── index_builder.py ├── index_storage │ └── .gitignore ├── ingestion_storage │ └── .gitignore ├── logging_functions.py ├── quiz_UI.py ├── quiz_builder.py ├── requirements.txt ├── session_data │ └── .gitignore ├── session_functions.py ├── slides.py ├── training_UI.py ├── training_material_builder.py └── user_onboarding.py ├── README.md ├── ch10 ├── files │ ├── sample_document1.txt │ └── sample_document2.txt ├── sample_get_prompts.py ├── sample_metadata_custom_prompt_templates.py ├── sample_metadata_summary_extractor.py └── sample_update_prompts.py ├── ch11 └── .gitignore ├── ch2 ├── files │ ├── sample_document1.txt │ └── sample_document2.txt └── sample1.py ├── ch3 ├── sample_App_Chat_About_Messi.py ├── sample_Create_Doc.py ├── sample_Create_Doc_from_Wiki.py ├── sample_Create_Node.py ├── sample_Create_Node_Relationships.py ├── sample_Create_and_Query_Index.py ├── sample_Customize_LLM.py ├── sample_LLM_parameters.py └── sample_TokenTextSplitter.py ├── ch4 ├── files │ ├── db │ │ └── example.db │ ├── others │ │ ├── sample.html │ │ ├── sample.json │ │ └── sample.md │ ├── sample_document1.txt │ └── sample_document2.txt ├── sample_Estimate_Costs.py ├── sample_IngestionPipeline.py ├── sample_PII_scrub.py ├── sample_extractor_Custom.py ├── sample_extractor_EntityExtractor.py ├── sample_extractor_KeywordExtractor.py ├── sample_extractor_QuestionAnsweredExtractor.py ├── sample_extractor_SummaryExtractor.py ├── sample_extractor_TitleExtractor.py ├── sample_node_creation1.py ├── sample_node_creation2.py ├── sample_parser_HTMLNodeParser.py ├── sample_parser_HierarchicalNodeParser.py ├── sample_parser_JSONNodeParser.py ├── sample_parser_LangchainNodeParser.py ├── sample_parser_MarkdownNodeParser.py ├── sample_parser_SentenceWindowNodeParser.py ├── sample_parser_SimpleFileNodeParser.py ├── sample_parser_in_ServiceContext.py ├── sample_reader_DatabaseReader.py ├── sample_reader_DiscordReader.py ├── sample_reader_GitHubRepositoryReader.py ├── sample_reader_OpenMap.py ├── sample_reader_S3Reader.py ├── sample_reader_SimpleDirectoryReader.py ├── sample_reader_SimpleWebPageReader.py ├── sample_splitter_CodeSplitter.py └── sample_splitter_TokenTextSplitter.py ├── ch5 ├── chroma_database │ ├── c682bd4d-855c-4ed1-9894-22884774223d │ │ ├── data_level0.bin │ │ ├── header.bin │ │ ├── length.bin │ │ └── link_lists.bin │ └── chroma.sqlite3 ├── cost_prediction_samples │ └── fluffy_the_cat.txt ├── data │ └── sample.md ├── files │ ├── others │ │ ├── sample.html │ │ ├── sample.json │ │ └── sample.md │ ├── sample_document1.txt │ └── sample_document2.txt ├── index_cache │ ├── default__vector_store.json │ ├── docstore.json │ ├── graph_store.json │ ├── image__vector_store.json │ └── index_store.json ├── sample_ChromaDB.py ├── sample_ComposableGraph.py ├── sample_DocumentSummaryIndex.py ├── sample_KeywordTableIndex.py ├── sample_KnowledgeGraphIndex.py ├── sample_SummaryIndex.py ├── sample_TreeIndex.py ├── sample_VectorStoreIndex.py ├── sample_cost_estimation1.py ├── sample_cost_estimation2.py ├── sample_local_embedding.py ├── sample_persist.py └── sample_persist_reload.py ├── ch6 ├── files │ ├── others │ │ ├── sample.html │ │ ├── sample.json │ │ └── sample.md │ ├── sample_document1.txt │ └── sample_document2.txt ├── sample_bm25_retriever.py ├── sample_decompose_query_transform.py ├── sample_metadata_filters.py ├── sample_openai_question_generator.py ├── sample_retriever_async.py ├── sample_retriever_direct.py ├── sample_retriever_from_index1.py ├── sample_retriever_tools.py └── sample_selectors.py ├── ch7 ├── files │ ├── other │ │ └── sample_fluffy_the_cat.txt │ ├── sample │ │ ├── sample_document1.txt │ │ ├── sample_document2.txt │ │ └── sample_document3.txt │ ├── sample_document1.txt │ └── sample_document2.txt ├── sample_KeywordNode_Postprocessor.py ├── sample_Langchain_Output_Parser.py ├── sample_Metadata_Replacement_Postprocessor.py ├── sample_Router_Query_Engine.py ├── sample_Similarity_Postprocessor.py ├── sample_SubQuestion_Query_Engine.py ├── sample_low_level_query_engine.py └── sample_response_synthesizer.py ├── ch8 ├── files │ ├── database │ │ └── employees.db │ ├── sample_document1.txt │ └── sample_document2.txt ├── other │ ├── sample_document1.txt │ ├── sample_document2.txt │ └── sample_document3.txt ├── sample_agentrunner.py ├── sample_chat_memory.py ├── sample_condense_question.py ├── sample_context_chat_engine.py ├── sample_function_tool.py ├── sample_llmcompiler_agent.py ├── sample_loandandsearch_toolspec.py ├── sample_ondemandloader_tool.py ├── sample_openai_agent.py ├── sample_simple_chat_engine.py └── sample_simple_chat_engine_custom_LLM.py └── ch9 ├── evaluation_data └── pairwise_evaluator_dataset.json ├── files ├── database │ └── employees.db ├── sample_document1.txt └── sample_document2.txt ├── requirements.txt ├── sample_eval_phoenix.py ├── sample_llama_packs_zephyr.py ├── sample_lm_studio_LLM.py ├── sample_neutrino.py └── sample_tracing_phoenix2.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PITS_APP/app.py: -------------------------------------------------------------------------------- 1 | from user_onboarding import user_onboarding 2 | from session_functions import load_session, delete_session, save_session 3 | from logging_functions import reset_log 4 | from quiz_UI import show_quiz 5 | from training_UI import show_training_UI 6 | import streamlit as st 7 | import os 8 | 9 | def main(): 10 | st.set_page_config(layout="wide") 11 | st.sidebar.title('P.I.T.S.') 12 | st.sidebar.markdown('### Your Personalized Intelligent Tutoring System') 13 | 14 | if 'OPENAI_API_KEY' not in st.session_state or not st.session_state['OPENAI_API_KEY']: 15 | api_key = st.text_input("Enter your OpenAI API Key (or leave blank if running locally): ") 16 | st.session_state['OPENAI_API_KEY'] = api_key 17 | os.environ['OPENAI_API_KEY'] = api_key 18 | 19 | # Check if the user is returning and has opted to take a quiz 20 | if 'show_quiz' in st.session_state and st.session_state['show_quiz']: 21 | show_quiz(st.session_state['study_subject']) # Show the quiz screen immediately 22 | elif 'resume_session' in st.session_state and st.session_state['resume_session']: 23 | # If resuming, clear previous content and show the training UI 24 | st.session_state['show_quiz'] = False # Ensure quiz is not shown 25 | show_training_UI(st.session_state['user_name'], st.session_state['study_subject']) 26 | elif not load_session(st.session_state): 27 | user_onboarding() # Show the onboarding screen for new users 28 | else: 29 | # For returning users, display options to resume or start a new session 30 | st.write(f"Welcome back {st.session_state['user_name']}!") 31 | col1, col2 = st.columns(2) 32 | if col1.button(f"Resume your study of {st.session_state['study_subject']}"): 33 | # Mark the session to be resumed and rerun to clear previous content 34 | st.session_state['resume_session'] = True 35 | st.rerun() 36 | if col2.button('Start a new session'): 37 | delete_session(st.session_state) 38 | reset_log() 39 | # Clear session state and rerun for a fresh start 40 | for key in list(st.session_state.keys()): 41 | del st.session_state[key] 42 | st.rerun() 43 | 44 | if __name__ == "__main__": 45 | main() 46 | -------------------------------------------------------------------------------- /PITS_APP/cache/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/PITS_APP/cache/.gitignore -------------------------------------------------------------------------------- /PITS_APP/conversation_engine.py: -------------------------------------------------------------------------------- 1 | #Conversation storage currently not working 2 | 3 | import os 4 | import json 5 | import streamlit as st 6 | from openai import OpenAI 7 | from llama_index.core import load_index_from_storage 8 | from llama_index.core import StorageContext 9 | from llama_index.core.memory import ChatMemoryBuffer 10 | from llama_index.core.tools import QueryEngineTool, ToolMetadata 11 | from llama_index.agent.openai import OpenAIAgent 12 | from llama_index.core.storage.chat_store import SimpleChatStore 13 | from global_settings import INDEX_STORAGE, CONVERSATION_FILE 14 | 15 | def load_chat_store(): 16 | try: 17 | chat_store = SimpleChatStore.from_persist_path( 18 | CONVERSATION_FILE 19 | ) 20 | except FileNotFoundError: 21 | chat_store = SimpleChatStore() 22 | return chat_store 23 | 24 | def display_messages(chat_store, container): 25 | with container: 26 | for message in chat_store.get_messages(key="0"): 27 | with st.chat_message(message.role): 28 | st.markdown(message.content) 29 | 30 | def initialize_chatbot(user_name, study_subject, 31 | chat_store, container, context): 32 | memory = ChatMemoryBuffer.from_defaults( 33 | token_limit=3000, 34 | chat_store=chat_store, 35 | chat_store_key="0" 36 | ) 37 | storage_context = StorageContext.from_defaults( 38 | persist_dir=INDEX_STORAGE 39 | ) 40 | index = load_index_from_storage( 41 | storage_context, index_id="vector" 42 | ) 43 | study_materials_engine = index.as_query_engine( 44 | similarity_top_k=3 45 | ) 46 | study_materials_tool = QueryEngineTool( 47 | query_engine=study_materials_engine, 48 | metadata=ToolMetadata( 49 | name="study_materials", 50 | description=( 51 | f"Provides official information about " 52 | f"{study_subject}. Use a detailed plain " 53 | f"text question as input to the tool." 54 | ), 55 | ) 56 | ) 57 | agent = OpenAIAgent.from_tools( 58 | tools=[study_materials_tool], 59 | memory=memory, 60 | system_prompt=( 61 | f"Your name is PITS, a personal tutor. Your " 62 | f"purpose is to help {user_name} study and " 63 | f"better understand the topic of: " 64 | f"{study_subject}. We are now discussing the " 65 | f"slide with the following content: {context}" 66 | ) 67 | ) 68 | display_messages(chat_store, container) 69 | return agent 70 | 71 | def chat_interface(agent, chat_store, container): 72 | prompt = st.chat_input("Type your question here:") 73 | if prompt: 74 | with container: 75 | with st.chat_message("user"): 76 | st.markdown(prompt) 77 | response = str(agent.chat(prompt)) 78 | with st.chat_message("assistant"): 79 | st.markdown(response) 80 | #chat_store.persist(CONVERSATION_FILE) 81 | -------------------------------------------------------------------------------- /PITS_APP/document_uploader.py: -------------------------------------------------------------------------------- 1 | #ingest uploaded documents 2 | from global_settings import STORAGE_PATH, INDEX_STORAGE, CACHE_FILE 3 | from logging_functions import log_action 4 | from llama_index.core import SimpleDirectoryReader, VectorStoreIndex 5 | from llama_index.core.ingestion import IngestionPipeline, IngestionCache 6 | from llama_index.core.node_parser import TokenTextSplitter 7 | from llama_index.core.extractors import SummaryExtractor 8 | from llama_index.embeddings.openai import OpenAIEmbedding 9 | 10 | def ingest_documents(): 11 | documents = SimpleDirectoryReader( 12 | STORAGE_PATH, 13 | filename_as_id = True 14 | ).load_data() 15 | for doc in documents: 16 | print(doc.id_) 17 | log_action( 18 | f"File '{doc.id_}' uploaded user", 19 | action_type="UPLOAD" 20 | ) 21 | 22 | try: 23 | cached_hashes = IngestionCache.from_persist_path( 24 | CACHE_FILE 25 | ) 26 | print("Cache file found. Running using cache...") 27 | except: 28 | cached_hashes = "" 29 | print("No cache file found. Running without cache...") 30 | pipeline = IngestionPipeline( 31 | transformations=[ 32 | TokenTextSplitter( 33 | chunk_size=1024, 34 | chunk_overlap=20 35 | ), 36 | SummaryExtractor(summaries=['self']), 37 | OpenAIEmbedding() 38 | ], 39 | cache=cached_hashes 40 | ) 41 | 42 | nodes = pipeline.run(documents=documents) 43 | pipeline.cache.persist(CACHE_FILE) 44 | 45 | return nodes 46 | 47 | if __name__ == "__main__": 48 | embedded_nodes = ingest_documents() -------------------------------------------------------------------------------- /PITS_APP/global_settings.py: -------------------------------------------------------------------------------- 1 | LOG_FILE = "session_data/user_actions.log" 2 | SESSION_FILE = "session_data/user_session_state.yaml" 3 | CACHE_FILE = "cache/pipeline_cache.json" 4 | CONVERSATION_FILE = "cache/chat_history.json" 5 | QUIZ_FILE = "cache/quiz.csv" 6 | SLIDES_FILE = "cache/slides.json" 7 | STORAGE_PATH = "ingestion_storage/" 8 | INDEX_STORAGE = "index_storage" 9 | 10 | QUIZ_SIZE = 5 11 | ITEMS_ON_SLIDE = 4 12 | 13 | -------------------------------------------------------------------------------- /PITS_APP/index_builder.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import VectorStoreIndex, TreeIndex, load_index_from_storage 2 | from llama_index.core import StorageContext 3 | from global_settings import INDEX_STORAGE 4 | from document_uploader import ingest_documents 5 | 6 | def build_indexes(nodes): 7 | try: 8 | storage_context = StorageContext.from_defaults( 9 | persist_dir=INDEX_STORAGE 10 | ) 11 | vector_index = load_index_from_storage( 12 | storage_context, index_id="vector" 13 | ) 14 | tree_index = load_index_from_storage( 15 | storage_context, index_id="tree" 16 | ) 17 | print("All indices loaded from storage.") 18 | except Exception as e: 19 | print(f"Error occurred while loading indices: {e}") 20 | storage_context = StorageContext.from_defaults() 21 | vector_index = VectorStoreIndex( 22 | nodes, storage_context=storage_context 23 | ) 24 | vector_index.set_index_id("vector") 25 | tree_index = TreeIndex( 26 | nodes, storage_context=storage_context 27 | ) 28 | tree_index.set_index_id("tree") 29 | storage_context.persist( 30 | persist_dir=INDEX_STORAGE 31 | ) 32 | print("New indexes created and persisted.") 33 | return vector_index, tree_index 34 | 35 | -------------------------------------------------------------------------------- /PITS_APP/index_storage/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/PITS_APP/index_storage/.gitignore -------------------------------------------------------------------------------- /PITS_APP/ingestion_storage/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/PITS_APP/ingestion_storage/.gitignore -------------------------------------------------------------------------------- /PITS_APP/logging_functions.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from global_settings import LOG_FILE 3 | import os 4 | 5 | def log_action(action, action_type): 6 | timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 7 | log_entry = f"{timestamp}: {action_type} : {action}\n" 8 | with open(LOG_FILE, 'a') as file: 9 | file.write(log_entry) 10 | 11 | def reset_log(): 12 | with open(LOG_FILE, 'w') as file: 13 | file.truncate(0) 14 | -------------------------------------------------------------------------------- /PITS_APP/quiz_UI.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | from global_settings import QUIZ_FILE 4 | 5 | def show_quiz(topic): 6 | st.markdown(f"### Let's test your knowledge on {topic} with a quiz:") 7 | df = pd.read_csv(QUIZ_FILE) 8 | answers = {} 9 | for index, row in df.iterrows(): 10 | question = row["Question_text"] 11 | options = [row["Option1"], row["Option2"], row["Option3"], row["Option4"]] 12 | answers[row["Question_no"]] = st.radio(question, options, index=None, key=row["Question_no"]) 13 | 14 | all_answered = all(answer is not None for answer in answers.values()) 15 | if all_answered: 16 | if st.button("SUBMIT ANSWERS"): 17 | score = 0 18 | for q_no in answers: 19 | user_answer_text = answers[q_no] 20 | correct_answer_text = df.loc[df['Question_no'] == q_no, "Correct_answer"].iloc[0] 21 | if user_answer_text == correct_answer_text: 22 | score += 1 23 | 24 | max_score = len(df) 25 | third_of_max = max_score / 3 26 | level = "" 27 | if score <= third_of_max: 28 | level = "Beginner" 29 | elif third_of_max < score <= 2 * third_of_max: 30 | level = "Intermediate" 31 | else: 32 | level = "Advanced" 33 | 34 | st.write(f"Your score is: {score}/{max_score}") 35 | st.write(f"Your level of knowledge: {level}") 36 | return level, score 37 | -------------------------------------------------------------------------------- /PITS_APP/quiz_builder.py: -------------------------------------------------------------------------------- 1 | # prepares quizz questions based on the uploaded files 2 | 3 | from llama_index.core import load_index_from_storage, StorageContext 4 | from llama_index.program.evaporate.df import DFRowsProgram 5 | from llama_index.program.openai import OpenAIPydanticProgram 6 | from global_settings import INDEX_STORAGE, QUIZ_SIZE, QUIZ_FILE 7 | import pandas as pd 8 | 9 | def build_quiz(topic): 10 | df = pd.DataFrame( 11 | { 12 | "Question_no": pd.Series(dtype="int"), 13 | "Question_text": pd.Series(dtype="str"), 14 | "Option1": pd.Series(dtype="str"), 15 | "Option2": pd.Series(dtype="str"), 16 | "Option3": pd.Series(dtype="str"), 17 | "Option4": pd.Series(dtype="str"), 18 | "Correct_answer": pd.Series(dtype="str"), 19 | "Rationale": pd.Series(dtype="str"), 20 | } 21 | ) 22 | storage_context = StorageContext.from_defaults(persist_dir=INDEX_STORAGE) 23 | vector_index = load_index_from_storage( 24 | storage_context, index_id="vector" 25 | ) 26 | df_rows_program = DFRowsProgram.from_defaults( 27 | pydantic_program_cls=OpenAIPydanticProgram, df=df 28 | ) 29 | query_engine = vector_index.as_query_engine() 30 | response = query_engine.query( 31 | f"Create {QUIZ_SIZE} different quiz questions relevant for testing a candidate's knowledge about {topic}. Each question will have 4 answer options. Questions must be general topic-related, not specific to the provided text. For each question, provide also the correct answer and the answer rationale. The rationale must not make any reference to the provided context, any exams or the topic name. Only one answer option should be correct." 32 | ) 33 | result_obj = df_rows_program(input_str=response) 34 | new_df=result_obj.to_df(existing_df=df) 35 | new_df.to_csv(QUIZ_FILE, index=False) 36 | return new_df 37 | 38 | -------------------------------------------------------------------------------- /PITS_APP/requirements.txt: -------------------------------------------------------------------------------- 1 | llama_index==0.10.15 2 | llama-index-program-evaporate==0.1.2 3 | openai==1.13.3 4 | pandas==1.5.2 5 | PyYAML==6.0.1 6 | 7 | 8 | -------------------------------------------------------------------------------- /PITS_APP/session_data/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/PITS_APP/session_data/.gitignore -------------------------------------------------------------------------------- /PITS_APP/session_functions.py: -------------------------------------------------------------------------------- 1 | from global_settings import SESSION_FILE, STORAGE_PATH 2 | import yaml 3 | import os 4 | 5 | def save_session(state): 6 | state_to_save = {key: value for key, value in state.items()} 7 | with open(SESSION_FILE, 'w') as file: 8 | yaml.dump(state_to_save, file) 9 | 10 | def load_session(state): 11 | if os.path.exists(SESSION_FILE): 12 | with open(SESSION_FILE, 'r') as file: 13 | try: 14 | loaded_state = yaml.safe_load(file) or {} 15 | for key, value in loaded_state.items(): 16 | state[key] = value 17 | return True 18 | except yaml.YAMLError as e: 19 | return False 20 | return False 21 | 22 | def delete_session(state): 23 | if os.path.exists(SESSION_FILE): 24 | os.remove(SESSION_FILE) 25 | for filename in os.listdir(STORAGE_PATH): 26 | file_path = os.path.join(STORAGE_PATH, filename) 27 | if os.path.isfile(file_path) or os.path.islink(file_path): 28 | os.remove(file_path) 29 | for key in list(state.keys()): 30 | del state[key] -------------------------------------------------------------------------------- /PITS_APP/slides.py: -------------------------------------------------------------------------------- 1 | ### TODO: 2 | # - persist audio files in Slide 3 | 4 | import json 5 | 6 | class SlideDeck: 7 | def __init__(self, topic, slides): 8 | self.topic = topic 9 | self.slides = slides 10 | 11 | def to_dict(self): 12 | return { 13 | 'topic': self.topic, 14 | 'slides': [slide.to_dict() for slide in self.slides] 15 | } 16 | 17 | def save_to_file(self, filename): 18 | with open(filename, 'w') as file: 19 | json.dump(self.to_dict(), file, indent=4) 20 | 21 | @classmethod 22 | def load_from_file(cls, filename): 23 | with open(filename, 'r') as file: 24 | data = json.load(file) 25 | slides = [Slide(**slide_data) for slide_data in data['slides']] 26 | return cls(data['topic'], slides) 27 | 28 | 29 | class Slide: 30 | def __init__(self, section, topic, narration, bullets): 31 | self.section = section 32 | self.topic = topic 33 | self.narration = narration 34 | self.bullets = bullets 35 | 36 | def to_dict(self): 37 | return { 38 | 'section': self.section, 39 | 'topic': self.topic, 40 | 'narration': self.narration, 41 | 'bullets': self.bullets 42 | } 43 | 44 | def render(self, display_narration=False): 45 | markdown_text = f"# {self.section}\n## {self.topic}\n" 46 | for index, bullet in enumerate(self.bullets, start=1): 47 | markdown_text += f"{index}. {bullet.strip()}\n\n" 48 | if display_narration: 49 | markdown_text += "---\n\n\n" # Separator line 50 | markdown_text += f"*{self.narration}*" 51 | return markdown_text -------------------------------------------------------------------------------- /PITS_APP/training_UI.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from slides import Slide, SlideDeck 3 | import json 4 | from openai import OpenAI 5 | from pathlib import Path 6 | from conversation_engine import initialize_chatbot, chat_interface, load_chat_store 7 | 8 | def show_training_UI(user_name, study_subject): 9 | # Load the slide deck 10 | slide_deck = SlideDeck.load_from_file("cache/slides.json") 11 | 12 | # Display title and slide navigation controls 13 | st.sidebar.markdown("## " + slide_deck.topic) 14 | current_slide_index = st.sidebar.number_input("Slide Number", min_value=0, max_value=len(slide_deck.slides)-1, value=0, step=1) 15 | current_slide = slide_deck.slides[current_slide_index] 16 | if st.sidebar.button("Toggle narration"): 17 | st.session_state.show_narration = not st.session_state.get('show_narration', False) 18 | 19 | # Displaying slides and narration in the main area 20 | col1, col2 = st.columns([0.7,0.3],gap="medium") 21 | with col1: 22 | st.markdown(current_slide.render(display_narration=st.session_state.get('show_narration', False)), unsafe_allow_html=True) 23 | 24 | # Chatbot integration in the sidebar 25 | with col2: 26 | st.header("💬 P.I.T.S. Chatbot") 27 | st.success(f"Hello {user_name}. I'm here to answer questions about {study_subject}") 28 | #with st.spinner("Preparing the chatbot..."): 29 | chat_store = load_chat_store() 30 | container = st.container(height=600) 31 | context = current_slide.render(display_narration=False) 32 | agent = initialize_chatbot(user_name, study_subject, chat_store, container, context) 33 | chat_interface(agent, chat_store, container) 34 | 35 | -------------------------------------------------------------------------------- /PITS_APP/training_material_builder.py: -------------------------------------------------------------------------------- 1 | # pip install llama-index-program-evaporate 2 | 3 | from llama_index.core import TreeIndex, load_index_from_storage 4 | from llama_index.core.storage import StorageContext 5 | from llama_index.core.extractors import KeywordExtractor 6 | from llama_index.program.openai import OpenAIPydanticProgram 7 | from llama_index.program.evaporate.df import DFRowsProgram 8 | from llama_index.core.schema import TextNode 9 | from llama_index.llms.openai import OpenAI 10 | 11 | from global_settings import STORAGE_PATH, INDEX_STORAGE, CACHE_FILE, SLIDES_FILE 12 | from logging_functions import log_action 13 | from document_uploader import ingest_documents 14 | from slides import Slide, SlideDeck 15 | import pandas as pd 16 | import streamlit as st 17 | from collections import Counter 18 | 19 | def generate_slides(topic): 20 | llm = OpenAI(temperature=0.5, model="gpt-4-1106-preview", max_tokens=4096) 21 | 22 | with st.spinner("Loading documents..."): 23 | embedded_nodes = ingest_documents() # either uploads everything or uses the cached documents to return the Nodes 24 | st.info("Docs loaded!") 25 | with st.spinner("Preparing summaries and keywords..."): 26 | summary_nodes = [] 27 | for node in embedded_nodes: 28 | # we create another set of nodes containing just the summaries 29 | summary = node.metadata["section_summary"] 30 | summary_node = TextNode(text=summary) 31 | summary_nodes.append(node) 32 | 33 | # we extract keywords from summaries 34 | key_extractor = KeywordExtractor (keywords=10) 35 | entities = key_extractor.extract(summary_nodes) 36 | flattened_keywords = [] 37 | for entity in entities: 38 | if 'excerpt_keywords' in entity: 39 | excerpt_keywords = entity['excerpt_keywords'] 40 | flattened_keywords.extend([keyword.strip() for keyword in excerpt_keywords.split(',')]) 41 | keyword_counts = Counter(flattened_keywords) 42 | 43 | # We sort keywords by their occurrences in descending order 44 | sorted_keywords = sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True) 45 | keywords_only = [keyword for keyword, count in sorted_keywords if count > 1] 46 | 47 | # we eliminate any generic keywords not related to topic using LLM 48 | specific_keywords="" 49 | for i in range(0, len(keywords_only), 15): 50 | group = keywords_only[i:i+15] 51 | group_str = ', '.join(group) # Converts the list to a string 52 | response = llm.complete(f"Eliminate any keyword which is generic and not precisely specific to the topic of {topic}. Format as comma separated. List just the remaining keywords: " + group_str) 53 | specific_keywords +=str(response) +',' 54 | st.info("Keywords and summaries prepared!") 55 | 56 | with st.spinner("Creating the course outline..."): 57 | # we generate the course outline using the LLM 58 | response = llm.complete(f"Create a structured course outline for a course about {topic}. The outline should be divided in sections and each section should be divided in several topics. Each section should have a sufficient number of topics to cover the entire knowledge area. The outline will contain a gradual introduction of concepts, starting with a general introduction on the subject and then covering more advanced areas. Respond with one line per section using this format:
. Make sure the outline completely covers these keywords: {specific_keywords}") 59 | 60 | df = pd.DataFrame({"Section": pd.Series(dtype="str"),"Topics": pd.Series(dtype="str")}) 61 | df_rows_program = DFRowsProgram.from_defaults(pydantic_program_cls=OpenAIPydanticProgram, df=df) 62 | result_obj = df_rows_program(input_str=response) 63 | outline=result_obj.to_df(existing_df=df) 64 | #outline.to_csv('course_outline.csv', sep=';', index=False) # optional. we save the outline in a CSV file 65 | st.info("Course outline complete!") 66 | 67 | with st.spinner("Creating the course slides and narration. This might take a while..."): 68 | #load indexes from storage 69 | storage_context = StorageContext.from_defaults(persist_dir=INDEX_STORAGE) 70 | tree_index = load_index_from_storage(storage_context, index_id="tree") 71 | 72 | #outline = pd.read_csv('course_outline.csv', delimiter=';') 73 | # we build slides and narration for each slide 74 | slides = [] 75 | for index, row in outline.iterrows(): 76 | section = row['Section'] 77 | topics = row['Topics'].split('; ') 78 | for slide_topic in topics: 79 | print(f"Generating content for: {section} - {slide_topic}") 80 | query_engine = tree_index.as_query_engine(response_mode="compact") 81 | narration = str(query_engine.query(f"You are an expert {topic} trainer. You are now covering the section titled '{section}'. Introduce and explain the concept of '{slide_topic}' to your students. Respond as you are the trainer.")) 82 | summary = llm.complete(f"Summarize the essential concepts from this text as maximum 7 very short slide bullets without verbs: {narration}\n The general topic of the presentation is {topic}\n The slide title is {section+'-'+slide_topic} List the bullets separated with semicolons like this: BULLET1, BULLET2, ...: ") 83 | bullets = str(summary).split(';') 84 | # Create a new Slide object and add it to the list 85 | slide = Slide(section, slide_topic, narration, bullets) 86 | slides.append(slide) 87 | st.info("Slides and narration generated!") 88 | 89 | slide_deck = SlideDeck(topic, slides) 90 | slide_deck.save_to_file(SLIDES_FILE) 91 | 92 | 93 | -------------------------------------------------------------------------------- /PITS_APP/user_onboarding.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from session_functions import save_session 4 | from logging_functions import log_action 5 | from global_settings import STORAGE_PATH 6 | from document_uploader import ingest_documents 7 | from training_material_builder import generate_slides 8 | from index_builder import build_indexes 9 | from quiz_builder import build_quiz 10 | import pandas as pd 11 | 12 | def user_onboarding(): 13 | user_name = st.text_input('What is your name?') 14 | if not user_name: return 15 | 16 | st.session_state['user_name'] = user_name 17 | st.write(f"Hello {user_name}. It's nice meeting you!") 18 | 19 | study_subject = st.text_input('What subject would you like to study?') 20 | if not study_subject: return 21 | 22 | st.session_state['study_subject'] = study_subject 23 | st.write(f"Okay {user_name}, let's focus on {study_subject}.") 24 | 25 | study_goal = st.text_input( 26 | 'Detail any specific goal for your study or just leave it blank:', 27 | key='Study Goal' 28 | ) 29 | st.session_state['study_goal'] = study_goal or "No specific goal" 30 | 31 | if study_goal: 32 | st.write("Do you want to upload any study materials?") 33 | uploaded_files = st.file_uploader("Choose files", accept_multiple_files=True) 34 | finish_upload = st.button('FINISH UPLOAD') 35 | 36 | if finish_upload and uploaded_files: 37 | saved_file_names = [] 38 | for uploaded_file in uploaded_files: 39 | file_path = os.path.join(STORAGE_PATH, uploaded_file.name) 40 | with open(file_path, "wb") as f: 41 | f.write(uploaded_file.getbuffer()) 42 | saved_file_names.append(uploaded_file.name) 43 | st.write(f"You have uploaded {uploaded_file.name}") 44 | 45 | st.session_state['uploaded_files'] = saved_file_names 46 | st.session_state['finish_upload'] = True 47 | st.info('Uploading files...') 48 | 49 | if 'finish_upload' in st.session_state or 'difficulty_level' in st.session_state: 50 | st.write('Please select your current knowledge level on the topic') 51 | difficulty_level = st.radio( 52 | 'Current knowledge:', 53 | ['Beginner', 'Intermediate', 'Advanced', 'Take a quiz to assess'], 54 | ) 55 | st.session_state['difficulty_level'] = difficulty_level 56 | proceed_button = st.button('Proceed') 57 | st.write(f'Your choice: {difficulty_level}') 58 | 59 | if proceed_button: 60 | save_session(st.session_state) 61 | if difficulty_level == 'Take a quiz to assess': 62 | st.info('Proceeding to quiz. Ingesting study materials first...') 63 | nodes = ingest_documents() 64 | st.info('Materials loaded. Preparing indexes...') 65 | keyword_index , vector_index = build_indexes (nodes) 66 | st.info('Indexing complete. Generating quiz...') 67 | quiz = build_quiz(study_subject) 68 | st.session_state['show_quiz'] = True 69 | st.rerun() 70 | st.info('Indexing complete. Generating slides...') 71 | generate_slides(study_subject) 72 | else: 73 | log_action( 74 | f"{user_name} wants to study the topic of {study_subject}, " 75 | f"aiming to achieve the following goal: '{study_goal}'. " 76 | f"The user uploaded {len(uploaded_files)} files and has self-assessed " 77 | f"their current knowledge on the topic as {difficulty_level}", 78 | action_type="ONBOARDING" 79 | ) 80 | st.info(f'Proceeding with difficulty level {difficulty_level}') 81 | st.info('Ingesting study materials first...') 82 | nodes = ingest_documents() 83 | st.info('Materials loaded. Preparing indexes...') 84 | keyword_index , vector_index = build_indexes (nodes) 85 | st.info('Indexing complete. Generating slides...') 86 | generate_slides(study_subject) 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Building Data-Driven Applications with LlamaIndex 2 | Building Data-Driven Applications with LlamaIndex 3 | 4 | This is the code repository for [Building Data-Driven Applications with LlamaIndex](https://www.packtpub.com/product/building-data-driven-applications-with-llamaindex/9781835089507), published by Packt. 5 | 6 | **A practical guide to retrieval augmented generation (RAG) for Enhancing LLM Applications** 7 | 8 | ## What is this book about? 9 | Generative AI, such as Large Language Models (LLMs) possess immense potential. These models simplify problems but have limitations, including contextual memory constraints, prompt size issues, real-time data gaps, and occasional "hallucinations." 10 | 11 | This book covers the following exciting features: 12 | * Understand the LlamaIndex ecosystem and common use cases 13 | * Master techniques to ingest and parse data from various sources into LlamaIndex 14 | * Discover how to create optimized indexes tailored to your use cases 15 | * Understand how to query LlamaIndex effectively and interpret responses 16 | * Build an end-to-end interactive web application with LlamaIndex, Python and, Streamlit 17 | * Customize a LlamaIndex configuration based on your project needs 18 | * Predict costs and deal with potential privacy issues 19 | * Deploy LlamaIndex applications that others can use 20 | 21 | If you feel this book is for you, get your [copy](https://www.amazon.com/Building-Data-Driven-Applications-LlamaIndex-retrieval-augmented/dp/183508950X/ref=sr_1_1?sr=8-1) today! 22 | 23 | ## Project Description 24 | 25 | This repository contains the code and resources for the book "Build Data-Driven LLM Applications with LlamaIndex: A Practical Guide to LlamaIndex for Python Developers". 26 | 27 | The project is built using LlamaIndex and Streamlit as the core technologies and offers the following features: 28 | 29 | - **Learning Objective Definition**: Define your learning goals and upload your own study materials in various formats including PDF, DOC and TXT. 30 | 31 | - **Knowledge Assessment Quiz**: Take a quiz to measure your current understanding of the subject, storing your responses as a baseline for your learning journey. 32 | 33 | - **Custom Learning Material**: PITS creates personalized educational content, including slides, narration and tests, all tailored to your level of expertise. 34 | 35 | - **Modular Learning Blocks**: The course is broken down into easily digestible modules, allowing you to progress at a pace that suits you. 36 | 37 | - **Question Tracking**: Keep track of all your interactions with the assistant and receive summary recaps whenever you return after 24 hours or upon request. 38 | 39 | ## Table of Contents 40 | 41 | - [Installation](#installation) 42 | - [Usage](#usage) 43 | - [Contributing](#contributing) 44 | - [License](#license) 45 | - [Contact](#contact) 46 | 47 | ## Installation 48 | 49 | 1. Clone this repository to your local machine. 50 | ```bash 51 | git clone https://github.com/PacktPublishing/Building-Data-Driven-LLM-Applications-with-LlamaIndex 52 | ``` 53 | 54 | 2. Navigate to the project directory. 55 | ```bash 56 | cd PITS_APP 57 | ``` 58 | 59 | 3. Install the required packages. 60 | ```bash 61 | pip install -r requirements.txt 62 | ``` 63 | 64 | ## Usage 65 | 66 | After installation, you can run the Streamlit app using the following command: 67 | 68 | ```bash 69 | streamlit run app.py 70 | ``` 71 | ## Contributing 72 | 73 | Contributions are welcome! For major changes, please open an issue first to discuss what you would like to change. 74 | 75 | ## License 76 | 77 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 78 | 79 | ## Get to Know the Author 80 | **Andrei Gheorghiu** is an experienced IT consultant and trainer with over 20 years of experience in the IT industry. Holding prestigious certifications such as ITIL Master, CISA, ISO 27001 Lead Auditor and CISSP, he has enriched over 15,000 students with knowledge in IT Service Management, Information Security, IT Governance and Audit. Driven by a passion for groundbreaking innovations with transformative potential, he taps into his vast experience, offering practical advice on harnessing technology to solve real-life challenges. 81 | 82 | For any questions, feel free to reach out at [andrei.gheorghiu@gmail.com](mailto:andrei.gheorghiu@gmail.com) 83 | 84 | 85 | -------------------------------------------------------------------------------- /ch10/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch10/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch10/sample_get_prompts.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 2 | 3 | documents = SimpleDirectoryReader("files").load_data() 4 | summary_index = SummaryIndex.from_documents(documents) 5 | qe = summary_index.as_query_engine() 6 | 7 | prompts = qe.get_prompts() 8 | 9 | for k, p in prompts.items(): 10 | print(f"Prompt Key: {k}") 11 | print("Text:") 12 | print(p.get_template()) 13 | print("\n") 14 | -------------------------------------------------------------------------------- /ch10/sample_metadata_custom_prompt_templates.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import TitleExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter() 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | title_extractor = TitleExtractor(summaries=["self"]) 11 | meta = title_extractor.extract(nodes) 12 | print("\nFirst title: " +meta[0]['document_title']) 13 | print("Second title: " +meta[1]['document_title']) 14 | 15 | combine_template = ( 16 | "{context_str}. Based on the above candidate titles " 17 | "and content, what is the comprehensive title for " 18 | "this document? Keep it under 6 words. Title: " 19 | ) 20 | title_extractor = TitleExtractor( 21 | summaries=["self"], 22 | combine_template=combine_template 23 | ) 24 | meta = title_extractor.extract(nodes) 25 | print("\nFirst title: " +meta[0]['document_title']) 26 | print("Second title: " +meta[1]['document_title']) 27 | 28 | -------------------------------------------------------------------------------- /ch10/sample_metadata_summary_extractor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import TitleExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter() 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | title_extractor = TitleExtractor(summaries=["self"]) 11 | meta= title_extractor.extract(nodes) 12 | 13 | print("\nFirst title: " +meta[0]['document_title']) 14 | print("Second title: " +meta[1]['document_title']) 15 | 16 | -------------------------------------------------------------------------------- /ch10/sample_update_prompts.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 2 | from llama_index.core import PromptTemplate 3 | 4 | documents = SimpleDirectoryReader("files").load_data() 5 | summary_index = SummaryIndex.from_documents(documents) 6 | qe = summary_index.as_query_engine() 7 | print(qe.query("Who burned Rome?")) 8 | print("------------------------") 9 | 10 | new_qa_template = ( 11 | "Context information is below." 12 | "---------------------" 13 | "{context_str}" 14 | "---------------------" 15 | "Given the context information " 16 | "and any of your prior knowledge, " 17 | "answer the query." 18 | "Query: {query_str}" 19 | "Answer:") 20 | 21 | template = PromptTemplate(new_qa_template) 22 | 23 | qe.update_prompts( 24 | {"response_synthesizer:text_qa_template": template} 25 | ) 26 | print(qe.query("Who burned Rome?")) -------------------------------------------------------------------------------- /ch11/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch11/.gitignore -------------------------------------------------------------------------------- /ch2/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch2/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch2/sample1.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 2 | 3 | documents = SimpleDirectoryReader('files').load_data() 4 | index = VectorStoreIndex.from_documents(documents) 5 | query_engine = index.as_query_engine() 6 | response = query_engine.query("summarize each document in a few sentences") 7 | 8 | print(response) 9 | -------------------------------------------------------------------------------- /ch3/sample_App_Chat_About_Messi.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Document, SummaryIndex 2 | from llama_index.core.node_parser import SimpleNodeParser 3 | from llama_index.readers.wikipedia import WikipediaReader 4 | 5 | loader = WikipediaReader() 6 | documents = loader.load_data(pages=["Messi Lionel"]) 7 | parser = SimpleNodeParser.from_defaults() 8 | nodes = parser.get_nodes_from_documents(documents) 9 | index = SummaryIndex(nodes) 10 | query_engine = index.as_query_engine() 11 | print("Ask me anything about Lionel Messi!") 12 | 13 | while True: 14 | question = input("Your question: ") 15 | if question.lower() == "exit": 16 | break 17 | response = query_engine.query(question) 18 | print(response) 19 | -------------------------------------------------------------------------------- /ch3/sample_Create_Doc.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Document 2 | 3 | text = "The quick brown fox jumps over the lazy dog." 4 | doc = Document( 5 | text=text, 6 | metadata={'author': 'John Doe','category': 'others'}, 7 | id_='1' 8 | ) 9 | print(doc) -------------------------------------------------------------------------------- /ch3/sample_Create_Doc_from_Wiki.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.wikipedia import WikipediaReader 2 | 3 | loader = WikipediaReader() 4 | documents = loader.load_data( 5 | pages=['Pythagorean theorem','General relativity'] 6 | ) 7 | print(f"loaded {len(documents)} documents") -------------------------------------------------------------------------------- /ch3/sample_Create_Node.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Document 2 | from llama_index.core.schema import TextNode 3 | 4 | doc = Document(text="This is a sample document text") 5 | n1 = TextNode(text=doc.text[0:16], doc_id=doc.id_) 6 | n2 = TextNode(text=doc.text[17:30], doc_id=doc.id_) 7 | print(n1) 8 | print(n2) 9 | -------------------------------------------------------------------------------- /ch3/sample_Create_Node_Relationships.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Document 2 | from llama_index.core.schema import ( 3 | TextNode, 4 | NodeRelationship, 5 | RelatedNodeInfo 6 | ) 7 | doc = Document(text="First sentence. Second Sentence") 8 | n1 = TextNode(text="First sentence", node_id=doc.doc_id) 9 | n2 = TextNode(text="Second sentence", node_id=doc.doc_id) 10 | 11 | n1.relationships[NodeRelationship.NEXT] = n2.node_id 12 | n2.relationships[NodeRelationship.PREVIOUS] = n1.node_id 13 | print(n1.relationships) 14 | print(n2.relationships) 15 | 16 | -------------------------------------------------------------------------------- /ch3/sample_Create_and_Query_Index.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SummaryIndex, Document 2 | from llama_index.core.schema import TextNode 3 | 4 | nodes = [ 5 | TextNode( 6 | text="Lionel Messi is a football player from Argentina." 7 | ), 8 | TextNode( 9 | text="He has won the Ballon d'Or trophy 7 times." 10 | ), 11 | TextNode(text="Lionel Messi's hometown is Rosario."), 12 | TextNode(text="He was born on June 24, 1987.") 13 | ] 14 | index = SummaryIndex(nodes) 15 | 16 | query_engine = index.as_query_engine() 17 | response = query_engine.query( 18 | "What is Messi's hometown?" 19 | ) 20 | print(response) -------------------------------------------------------------------------------- /ch3/sample_Customize_LLM.py: -------------------------------------------------------------------------------- 1 | from llama_index.llms.openai import OpenAI 2 | from llama_index.core.settings import Settings 3 | Settings.llm = OpenAI(temperature=0.8, model="gpt-4") 4 | 5 | from llama_index.core.schema import TextNode 6 | from llama_index.core import SummaryIndex 7 | 8 | nodes = [ 9 | TextNode(text="Lionel Messi's hometown is Rosario."), 10 | TextNode(text="He was born on June 24, 1987.") 11 | ] 12 | index = SummaryIndex(nodes) 13 | query_engine = index.as_query_engine() 14 | response = query_engine.query( 15 | "What is Messi's hometown?" 16 | ) 17 | print(response) -------------------------------------------------------------------------------- /ch3/sample_LLM_parameters.py: -------------------------------------------------------------------------------- 1 | from llama_index.llms.openai import OpenAI 2 | llm = OpenAI( 3 | model="gpt-3.5-turbo-1106", 4 | temperature=0.2, 5 | max_tokens=50, 6 | additional_kwargs={ 7 | "seed": 12345678, 8 | "top_p": 0.5 9 | } 10 | ) 11 | response = llm.complete( 12 | "Explain the concept of gravity in one sentence" 13 | ) 14 | print(response) 15 | -------------------------------------------------------------------------------- /ch3/sample_TokenTextSplitter.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Document 2 | from llama_index.core.node_parser import TokenTextSplitter 3 | 4 | doc = Document( 5 | text=( 6 | "This is sentence 1. This is sentence 2. " 7 | "Sentence 3 here." 8 | ), 9 | metadata={"author": "John Smith"} 10 | ) 11 | splitter = TokenTextSplitter( 12 | chunk_size=12, 13 | chunk_overlap=0, 14 | separator=" " 15 | ) 16 | 17 | nodes = splitter.get_nodes_from_documents([doc]) 18 | for node in nodes: 19 | print(node.text) 20 | print(node.metadata) -------------------------------------------------------------------------------- /ch4/files/db/example.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch4/files/db/example.db -------------------------------------------------------------------------------- /ch4/files/others/sample.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

Example of a simple HTML page

5 | Example: 6 |

First line

7 |

Second line

8 |

Third line

9 | 10 | 11 | -------------------------------------------------------------------------------- /ch4/files/others/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "quiz": { 3 | "sport": { 4 | "q1": { 5 | "question": "Which one is correct team name in NBA?", 6 | "options": [ 7 | "New York Bulls", 8 | "Los Angeles Kings", 9 | "Golden State Warriros", 10 | "Huston Rocket" 11 | ], 12 | "answer": "Huston Rocket" 13 | } 14 | }, 15 | "maths": { 16 | "q1": { 17 | "question": "5 + 7 = ?", 18 | "options": [ 19 | "10", 20 | "11", 21 | "12", 22 | "13" 23 | ], 24 | "answer": "12" 25 | }, 26 | "q2": { 27 | "question": "12 - 8 = ?", 28 | "options": [ 29 | "1", 30 | "2", 31 | "3", 32 | "4" 33 | ], 34 | "answer": "4" 35 | } 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /ch4/files/others/sample.md: -------------------------------------------------------------------------------- 1 | An h1 header 2 | ============ 3 | 4 | Paragraphs are separated by a blank line. 5 | 6 | 2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists 7 | look like: 8 | 9 | * this one 10 | * that one 11 | * the other one 12 | 13 | Note that --- not considering the asterisk --- the actual text 14 | content starts at 4-columns in. 15 | 16 | > Block quotes are 17 | > written like so. 18 | > 19 | > They can span multiple paragraphs, 20 | > if you like. 21 | 22 | Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all 23 | in chapters 12--14"). Three dots ... will be converted to an ellipsis. 24 | Unicode is supported. ☺ 25 | 26 | 27 | 28 | An h2 header 29 | ------------ 30 | 31 | Here's a numbered list: 32 | 33 | 1. first item 34 | 2. second item 35 | 3. third item 36 | 37 | Note again how the actual text starts at 4 columns in (4 characters 38 | from the left side). Here's a code sample: 39 | 40 | # Let me re-iterate ... 41 | for i in 1 .. 10 { do-something(i) } 42 | 43 | As you probably guessed, indented 4 spaces. By the way, instead of 44 | indenting the block, you can use delimited blocks, if you like: 45 | 46 | ~~~ 47 | define foobar() { 48 | print "Welcome to flavor country!"; 49 | } 50 | ~~~ 51 | 52 | (which makes copying & pasting easier). You can optionally mark the 53 | delimited block for Pandoc to syntax highlight it: 54 | 55 | ~~~python 56 | import time 57 | # Quick, count to ten! 58 | for i in range(10): 59 | # (but not *too* quick) 60 | time.sleep(0.5) 61 | print i 62 | ~~~ 63 | 64 | 65 | 66 | ### An h3 header ### 67 | 68 | Now a nested list: 69 | 70 | 1. First, get these ingredients: 71 | 72 | * carrots 73 | * celery 74 | * lentils 75 | 76 | 2. Boil some water. 77 | 78 | 3. Dump everything in the pot and follow 79 | this algorithm: 80 | 81 | find wooden spoon 82 | uncover pot 83 | stir 84 | cover pot 85 | balance wooden spoon precariously on pot handle 86 | wait 10 minutes 87 | goto first step (or shut off burner when done) 88 | 89 | Do not bump wooden spoon or it will fall. 90 | 91 | Notice again how text always lines up on 4-space indents (including 92 | that last line which continues item 3 above). 93 | 94 | Here's a link to [a website](http://foo.bar), to a [local 95 | doc](local-doc.html), and to a [section heading in the current 96 | doc](#an-h2-header). Here's a footnote [^1]. 97 | 98 | [^1]: Footnote text goes here. 99 | 100 | Tables can look like this: 101 | 102 | size material color 103 | ---- ------------ ------------ 104 | 9 leather brown 105 | 10 hemp canvas natural 106 | 11 glass transparent 107 | 108 | Table: Shoes, their sizes, and what they're made of 109 | 110 | (The above is the caption for the table.) Pandoc also supports 111 | multi-line tables: 112 | 113 | -------- ----------------------- 114 | keyword text 115 | -------- ----------------------- 116 | red Sunsets, apples, and 117 | other red or reddish 118 | things. 119 | 120 | green Leaves, grass, frogs 121 | and other things it's 122 | not easy being. 123 | -------- ----------------------- 124 | 125 | A horizontal rule follows. 126 | 127 | *** 128 | 129 | Here's a definition list: 130 | 131 | apples 132 | : Good for making applesauce. 133 | oranges 134 | : Citrus! 135 | tomatoes 136 | : There's no "e" in tomatoe. 137 | 138 | Again, text is indented 4 spaces. (Put a blank line between each 139 | term/definition pair to spread things out more.) 140 | 141 | Here's a "line block": 142 | 143 | | Line one 144 | | Line too 145 | | Line tree 146 | 147 | and images can be specified like so: 148 | 149 | ![example image](example-image.jpg "An exemplary image") 150 | 151 | Inline math equations go in like so: $\omega = d\phi / dt$. Display 152 | math should get its own line and be put in in double-dollarsigns: 153 | 154 | $$I = \int \rho R^{2} dV$$ 155 | 156 | And note that you can backslash-escape any punctuation characters 157 | which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc. -------------------------------------------------------------------------------- /ch4/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch4/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch4/sample_Estimate_Costs.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Settings 2 | from llama_index.core.extractors import QuestionsAnsweredExtractor 3 | from llama_index.core.llms.mock import MockLLM 4 | from llama_index.core.schema import TextNode 5 | from llama_index.core.callbacks import ( 6 | CallbackManager, 7 | TokenCountingHandler 8 | ) 9 | 10 | llm = MockLLM(max_tokens=256) 11 | counter = TokenCountingHandler(verbose=False) 12 | callback_manager = CallbackManager([counter]) 13 | 14 | Settings.llm = llm 15 | Settings.callback_manager = CallbackManager([counter]) 16 | 17 | sample_text = ( 18 | "LlamaIndex is a powerful tool used " 19 | "to create efficient indices from data." 20 | ) 21 | nodes= [TextNode(text=sample_text)] 22 | 23 | extractor = QuestionsAnsweredExtractor( 24 | show_progress=False 25 | ) 26 | 27 | Questions_metadata = extractor.extract(nodes) 28 | 29 | print(f"Prompt Tokens: {counter.prompt_llm_token_count}") 30 | print(f"Completion Tokens: {counter.completion_llm_token_count}") 31 | print(f"Total Token Count: {counter.total_llm_token_count}") 32 | -------------------------------------------------------------------------------- /ch4/sample_IngestionPipeline.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.extractors import SummaryExtractor,QuestionsAnsweredExtractor 3 | from llama_index.core.node_parser import TokenTextSplitter 4 | from llama_index.core.ingestion import IngestionPipeline, IngestionCache 5 | from llama_index.core.schema import TransformComponent 6 | 7 | class CustomTransformation(TransformComponent): 8 | def __call__(self, nodes, **kwargs): 9 | # run any node transformation logic here 10 | return nodes 11 | 12 | reader = SimpleDirectoryReader('files') 13 | documents = reader.load_data() 14 | try: 15 | cached_hashes = IngestionCache.from_persist_path( 16 | "./ingestion_cache.json" 17 | ) 18 | print("Cache file found. Running using cache...") 19 | except: 20 | cached_hashes = "" 21 | print("No cache file found. Running without cache...") 22 | 23 | pipeline = IngestionPipeline( 24 | transformations = [ 25 | CustomTransformation(), 26 | TokenTextSplitter( 27 | separator=" ", 28 | chunk_size=512, 29 | chunk_overlap=128), 30 | SummaryExtractor(), 31 | QuestionsAnsweredExtractor( 32 | questions=3 33 | ) 34 | ], 35 | cache=cached_hashes 36 | ) 37 | 38 | nodes = pipeline.run(documents=documents, show_progress=True) 39 | pipeline.cache.persist("./ingestion_cache.json") 40 | 41 | print("All documents loaded") 42 | 43 | -------------------------------------------------------------------------------- /ch4/sample_PII_scrub.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.postprocessor import NERPIINodePostprocessor 2 | from llama_index.llms.huggingface import HuggingFaceLLM 3 | from llama_index.core.schema import NodeWithScore, TextNode 4 | 5 | original = ( 6 | "Dear Jane Doe. Your address has been recorded in " 7 | "our database. Please confirm it is valid: 8804 Vista " 8 | "Serro Dr. Cabo Robles, California(CA)." 9 | ) 10 | 11 | node = TextNode(text=original) 12 | processor = NERPIINodePostprocessor() 13 | 14 | clean_nodes = processor.postprocess_nodes( 15 | [NodeWithScore(node=node)] 16 | ) 17 | 18 | print(clean_nodes[0].node.get_text()) 19 | 20 | -------------------------------------------------------------------------------- /ch4/sample_extractor_Custom.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import BaseExtractor 4 | from typing import List, Dict 5 | 6 | class CustomExtractor(BaseExtractor): 7 | async def aextract(self, nodes) -> List[Dict]: 8 | metadata_list = [ 9 | { 10 | "node_length": str(len(node.text)) 11 | } 12 | for node in nodes 13 | ] 14 | return metadata_list 15 | 16 | reader = SimpleDirectoryReader('files') 17 | documents = reader.load_data() 18 | 19 | parser = SentenceSplitter(include_prev_next_rel=True) 20 | nodes = parser.get_nodes_from_documents(documents) 21 | 22 | extractor = CustomExtractor() 23 | print(extractor.extract(nodes)) 24 | -------------------------------------------------------------------------------- /ch4/sample_extractor_EntityExtractor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.extractors.entity import EntityExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter(include_prev_next_rel=True) 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | entity_extractor = EntityExtractor( 11 | label_entities = True, 12 | device = "cpu" 13 | ) 14 | metadata_list = entity_extractor.extract(nodes) 15 | 16 | print(metadata_list) 17 | -------------------------------------------------------------------------------- /ch4/sample_extractor_KeywordExtractor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import KeywordExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter(include_prev_next_rel=True) 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | key_extractor = KeywordExtractor(keywords=3) 11 | metadata_list = key_extractor.extract(nodes) 12 | 13 | print(metadata_list) 14 | -------------------------------------------------------------------------------- /ch4/sample_extractor_QuestionAnsweredExtractor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import QuestionsAnsweredExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter(include_prev_next_rel=True) 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | qa_extractor = QuestionsAnsweredExtractor(questions=5) 11 | metadata_list = qa_extractor.extract(nodes) 12 | 13 | print(metadata_list) -------------------------------------------------------------------------------- /ch4/sample_extractor_SummaryExtractor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import SummaryExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter(include_prev_next_rel=True) 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | summary_extractor = SummaryExtractor( 11 | summaries=["prev", "self", "next"] 12 | ) 13 | metadata_list = summary_extractor.extract(nodes) 14 | 15 | print(metadata_list) -------------------------------------------------------------------------------- /ch4/sample_extractor_TitleExtractor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core.extractors import TitleExtractor 4 | 5 | reader = SimpleDirectoryReader('files') 6 | documents = reader.load_data() 7 | parser = SentenceSplitter(include_prev_next_rel=True) 8 | nodes = parser.get_nodes_from_documents(documents) 9 | 10 | title_extractor = TitleExtractor() 11 | metadata_list = title_extractor.extract(nodes) 12 | 13 | print(metadata_list) 14 | -------------------------------------------------------------------------------- /ch4/sample_node_creation1.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Document 2 | from llama_index.core.node_parser import SentenceWindowNodeParser 3 | doc = Document( 4 | text="Sentence 1. Sentence 2. Sentence 3." 5 | ) 6 | parser = SentenceWindowNodeParser.from_defaults( 7 | window_size=2 , 8 | window_metadata_key="ContextWindow", 9 | original_text_metadata_key="node_text" 10 | ) 11 | nodes = parser.get_nodes_from_documents([doc]) 12 | print(nodes[1]) 13 | print(nodes[1].metadata) -------------------------------------------------------------------------------- /ch4/sample_node_creation2.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Settings, Document, VectorStoreIndex 2 | from llama_index.core.node_parser import SentenceWindowNodeParser 3 | doc = Document( 4 | text="Sentence 1. Sentence 2. Sentence 3." 5 | ) 6 | text_splitter = SentenceWindowNodeParser.from_defaults( 7 | window_size=2 , 8 | window_metadata_key="ContextWindow", 9 | original_text_metadata_key="node_text" 10 | ) 11 | Settings.text_splitter = text_splitter 12 | 13 | index = VectorStoreIndex.from_documents([doc]) 14 | retriever = index.as_retriever(similarity_top_k=1) 15 | response = retriever.retrieve("Display the second sentence") 16 | print(response[0].node.metadata['node_text']) 17 | 18 | -------------------------------------------------------------------------------- /ch4/sample_parser_HTMLNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import HTMLNodeParser 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("files/others/sample.html")) 7 | 8 | my_tags = ["p", "span"] 9 | html_parser = HTMLNodeParser(tags=my_tags) 10 | nodes = html_parser.get_nodes_from_documents(document) 11 | 12 | print(' elements:') 13 | for node in nodes: 14 | if node.metadata['tag']=='span': 15 | print(node.text) 16 | 17 | print('

elements:') 18 | for node in nodes: 19 | if node.metadata['tag']=='p': 20 | print(node.text) -------------------------------------------------------------------------------- /ch4/sample_parser_HierarchicalNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import HierarchicalNodeParser 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("files/sample_document1.txt")) 7 | 8 | hierarchical_parser = HierarchicalNodeParser.from_defaults( 9 | chunk_sizes=[128, 64, 32], 10 | chunk_overlap=0, 11 | ) 12 | nodes = hierarchical_parser.get_nodes_from_documents(document) 13 | 14 | 15 | for node in nodes: 16 | print(f"Metadata: {node.metadata} \nText: {node.text}") 17 | -------------------------------------------------------------------------------- /ch4/sample_parser_JSONNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import JSONNodeParser 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("files/others/sample.json")) 7 | 8 | json_parser = JSONNodeParser.from_defaults() 9 | nodes = json_parser.get_nodes_from_documents(document) 10 | 11 | for node in nodes: 12 | print(f"Metadata {node.metadata} \nText: {node.text}") -------------------------------------------------------------------------------- /ch4/sample_parser_LangchainNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import LangchainNodeParser 2 | from langchain.text_splitter import CharacterTextSplitter 3 | from llama_index.readers.file import FlatReader 4 | from pathlib import Path 5 | 6 | reader = FlatReader() 7 | document = reader.load_data(Path("files/sample_document1.txt")) 8 | 9 | parser = LangchainNodeParser(CharacterTextSplitter()) 10 | nodes = parser.get_nodes_from_documents(document) 11 | 12 | for node in nodes: 13 | print(f"Metadata {node.metadata} \nText: {node.text}") 14 | -------------------------------------------------------------------------------- /ch4/sample_parser_MarkdownNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import MarkdownNodeParser 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("files/others/sample.md")) 7 | 8 | parser = MarkdownNodeParser.from_defaults() 9 | nodes = parser.get_nodes_from_documents(document) 10 | 11 | for node in nodes: 12 | print(f"Metadata {node.metadata} \nText: {node.text}") 13 | 14 | -------------------------------------------------------------------------------- /ch4/sample_parser_SentenceWindowNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import SentenceWindowNodeParser 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("files/sample_document1.txt")) 7 | 8 | parser = SentenceWindowNodeParser.from_defaults( 9 | window_size=2, 10 | window_metadata_key="text_window", 11 | original_text_metadata_key="original_sentence" 12 | ) 13 | nodes = parser.get_nodes_from_documents(document) 14 | 15 | for node in nodes: 16 | print(f"Metadata {node.metadata} \nText: {node.text}\n") -------------------------------------------------------------------------------- /ch4/sample_parser_SimpleFileNodeParser.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.file import FlatReader 2 | from pathlib import Path 3 | 4 | reader = FlatReader() 5 | document = reader.load_data(Path("files/sample_document1.txt")) 6 | 7 | print(f"Metadata: {document[0].metadata}") 8 | print(f"Text: {document[0].text}") 9 | 10 | -------------------------------------------------------------------------------- /ch4/sample_parser_in_ServiceContext.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import Settings, Document, VectorStoreIndex 2 | from llama_index.core.node_parser import SentenceWindowNodeParser 3 | 4 | doc = Document(text="Sentence 1. Sentence 2. Sentence 3.") 5 | text_splitter = SentenceWindowNodeParser.from_defaults( 6 | window_size=2 , 7 | window_metadata_key="ContextWindow", 8 | original_text_metadata_key="node_text" 9 | ) 10 | 11 | Settings.text_splitter=text_splitter 12 | index = VectorStoreIndex.from_documents([doc]) 13 | print("Successfully created the Index!") 14 | -------------------------------------------------------------------------------- /ch4/sample_reader_DatabaseReader.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.database import DatabaseReader 2 | reader = DatabaseReader( 3 | uri="sqlite:///files/db/example.db" 4 | ) 5 | query = "SELECT * FROM products" 6 | documents = reader.load_data(query=query) 7 | for doc in documents: 8 | print(doc.text) 9 | 10 | -------------------------------------------------------------------------------- /ch4/sample_reader_DiscordReader.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.discord import DiscordReader 2 | discord_token = "" 3 | channel_ids = [1234567890] 4 | reader = DiscordReader(discord_token=discord_token) 5 | documents = reader.load_data(channel_ids=channel_ids) 6 | 7 | -------------------------------------------------------------------------------- /ch4/sample_reader_GitHubRepositoryReader.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.github import GithubRepositoryReader 2 | 3 | documents = GithubRepositoryReader( 4 | github_token="" , 5 | owner= "", 6 | repo="", 7 | branch = "main", 8 | verbose=True, 9 | ignore_directories=["docs","test"] 10 | ).load_data(branch=branch) 11 | -------------------------------------------------------------------------------- /ch4/sample_reader_OpenMap.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.maps import OpenMap 2 | 3 | loader = OpenMap() 4 | documents = loader.load_data( 5 | localarea='Paris', 6 | search_tag='tourism', 7 | tag_only=True, 8 | local_area_buffer=2000, 9 | tag_values=['museum'] 10 | ) 11 | print(documents) -------------------------------------------------------------------------------- /ch4/sample_reader_S3Reader.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.s3 import S3Reader 2 | 3 | loader = S3Reader( 4 | bucket='', 5 | key='', 6 | aws_access_id='[ACCESS_KEY_ID]', 7 | aws_access_secret='[ACCESS_KEY_SECRET]' 8 | ) 9 | documents = loader.load_data() 10 | print(documents) 11 | 12 | 13 | -------------------------------------------------------------------------------- /ch4/sample_reader_SimpleDirectoryReader.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SimpleDirectoryReader 2 | 3 | reader = SimpleDirectoryReader( 4 | input_dir="files", 5 | recursive=True 6 | ) 7 | documents = reader.load_data() 8 | for doc in documents: 9 | print(doc.metadata) 10 | 11 | -------------------------------------------------------------------------------- /ch4/sample_reader_SimpleWebPageReader.py: -------------------------------------------------------------------------------- 1 | from llama_index.readers.web import SimpleWebPageReader 2 | 3 | urls = ["https://docs.llamaindex.ai"] 4 | documents = SimpleWebPageReader().load_data(urls) 5 | 6 | for doc in documents: 7 | print(doc.text) 8 | -------------------------------------------------------------------------------- /ch4/sample_splitter_CodeSplitter.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import CodeSplitter 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("sample_reader_GitHubRepositoryReader.py")) 7 | 8 | code_splitter = CodeSplitter.from_defaults( 9 | language = 'python', 10 | chunk_lines = 5, 11 | chunk_lines_overlap = 2, 12 | max_chars = 150 13 | ) 14 | nodes = code_splitter.get_nodes_from_documents(document) 15 | 16 | for node in nodes: 17 | print(f"Metadata {node.metadata} \nText: {node.text}\n") 18 | -------------------------------------------------------------------------------- /ch4/sample_splitter_TokenTextSplitter.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.node_parser import TokenTextSplitter 2 | from llama_index.readers.file import FlatReader 3 | from pathlib import Path 4 | 5 | reader = FlatReader() 6 | document = reader.load_data(Path("files/sample_document1.txt")) 7 | 8 | splitter = TokenTextSplitter( 9 | chunk_size = 70, 10 | chunk_overlap = 2, 11 | separator = " ", 12 | backup_separators = [".", "!", "?"] 13 | ) 14 | nodes = splitter.get_nodes_from_documents(document) 15 | 16 | for node in nodes: 17 | print(f"Metadata {node.metadata} \nText: {node.text}\n") 18 | -------------------------------------------------------------------------------- /ch5/chroma_database/c682bd4d-855c-4ed1-9894-22884774223d/header.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch5/chroma_database/c682bd4d-855c-4ed1-9894-22884774223d/header.bin -------------------------------------------------------------------------------- /ch5/chroma_database/c682bd4d-855c-4ed1-9894-22884774223d/length.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch5/chroma_database/c682bd4d-855c-4ed1-9894-22884774223d/length.bin -------------------------------------------------------------------------------- /ch5/chroma_database/c682bd4d-855c-4ed1-9894-22884774223d/link_lists.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch5/chroma_database/c682bd4d-855c-4ed1-9894-22884774223d/link_lists.bin -------------------------------------------------------------------------------- /ch5/chroma_database/chroma.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch5/chroma_database/chroma.sqlite3 -------------------------------------------------------------------------------- /ch5/cost_prediction_samples/fluffy_the_cat.txt: -------------------------------------------------------------------------------- 1 | Fluffy was a tiny tabby kitten with a squeaky meow and big curious eyes. She lived cozily with her mama cat named Millie and her four rambunctious, furry siblings. Fluffy's siblings all had names as well - there was Patches, the black and white tuxedo cat, Mittens, who had white paws, Rascal, a hyper orange kitten who always got into trouble, and Sweetie, a gentle calico cat that Fluffy slept snuggled up next to every night. 2 | 3 | The little family of cats lived together in a small cottage on the outskirts of a quaint neighborhood that backed up against a large park filled with trees, grassy meadows, and a little creek that babbled through it. Their human owner, Mrs. Wigglesworth, had taken in mama cat Millie when she was pregnant with kittens and about to give birth. Now, Millie and her kitten brood had a cozy place to call home. 4 | 5 | One sunny spring morning, as birds chirped outside the cottage's windows, little Fluffy was batting playfully at some string Mrs. Wigglesworth had tied to a stick, amused by how it danced just out of reach of her grasp. Her siblings Mittens, Sweetie, Patches, and Rascal were still snoozing, curled up together in their cat bed, their tiny bodies rising and falling rhythmically with each sleepy breath. 6 | 7 | Fluffy's short attention span got the best of her quickly and she lost interest in her toy. She trotted over to the slightly opened window and peered outside, entranced by what she saw - vibrant pink and yellow tulips swaying gently in the breeze, buzzing bumble bees hovering from flower to flower, and a vibrant blue butterfly with spots like gemstones flapping lazily past. 8 | 9 | Overcome by curiosity, the tiny tabby kitten pawed open the window a little wider and squeezed out onto the porch roof, the textured shingles scratchy on her paws. “Wow!” Fluffy squeaked, her big eyes round with wonder. She crawled to the edge of the roof to get a better look at the beautiful world outside her family's cozy cottage, things she had only peeked at from behind the window glass. 10 | 11 | Just then, that same shimmering blue butterfly caught her attention again, dancing on the breeze just out of reach. Compelled to give chase, Fluffy gathered up her hind legs and pounced as hard as she could, plunging straight off the porch roof! But being such a tiny kitten, she floated gently down onto the flower beds below unharmed if not a bit surprised. The fall had given the butterfly the head start it needed to stay ahead of the ambitious Fluffy. 12 | 13 | Determined to catch the twirling blue insect and utterly distracted by her mission, Fluffy let the butterfly lead her further and further into the woods that marked the edge of the park close by. She climbed over fallen logs, scrambled through brush and bushes on her short, clumsy legs. Her light grey striped fur was covered in flecks of mud and debris but she marched on happily through the little forest. 14 | 15 | After struggling through a particularly thick bramble bush, Fluffy finally trapped the now tired butterfly against a hollow log. “Gotcha!” she cried triumphantly, but just as she lifted a paw to take a swat, the terrified insect squeezed into a tiny gap in the rotting wood, once again eluding capture by the ambitious but unskillful kitten. 16 | 17 | Saddened at losing her newfound friend, Fluffy decided she had wandered far enough into the forest and began trying to retrace her steps back home. But everywhere she turned looked unfamiliar now - the towering trees stretched on forever in each direction. "Mama?" Fluffy timidly called out, her little mew echoing back at her tauntingly. 18 | 19 | Fluffy realized then that she was well and truly lost in the woods. Nervous tears pricked at her eyes making her normally perfect vision even more distorted. She tried smelling for her mama's familiar scent but was overwhelmed by the earthy smells of mud and moss. 20 | 21 | The sun was now setting low, casting ominous shadows everywhere. Strange night sounds echoed from all around - hoots, croaks, and rustles in the dark. Fluffy decided she had to find shelter fast or she may end up as another woodland creature's late night snack! 22 | 23 | Spotting a hollow opening in the trunk of a nearby fallen tree, the little grey tabby ducked inside the makeshift den, cramming her body as far back into the decaying wood as she could squeeze. Her tiny heart pounded in her fuzzy chest, cold tears soaking into her fur. Curled up tightly in the only shelter she could find, Fluffy meowed forlornly, “Mama? Mittens? Sweetie?” Her sad cries went unanswered except for faint echoes. Unable to fight her exhaustion any longer, Fluffy finally drifted off to sleep, bad dreams playing out behind her eyelids. 24 | 25 | The next morning, rays of sunlight pierced through the cracks in Fluffy's hollow tree trunk den. She awoke confused for a moment before remembering the whole awful ordeal. Stomach growling for food and parched for water, Fluffy knew she had to try to find her way out of these woods. She sniffed at the air and detected the promising scent of fresh water. Following the smell led her to a gently flowing stream with tiny fish darting through the crystal waters. 26 | 27 | Fluffy lapped up the cool water eagerly, relieving her dangerous thirst. As she followed alongside the babbling brook, eating some beetles and grass here and there, Fluffy became more hopeful that the stream would take her somewhere safe. 28 | 29 | Finally, after hours of walking, the trees opened up, revealing a sunny meadow filled with gently swaying reeds. Fluffy recognized the park near her family's cottage just across the field! Filled with renewed vigor, little Fluffy bounded through the tall grass, her fur slick with morning dew. 30 | 31 | As Fluffy neared the edge of the park close to the familiar dirt path that passed Mrs. Wigglesworth's cottage, she heard the distant but distinct sound of her mama cat crying out for her babies to come get milk. "Mittens! Sweetie! Fluffy!" Millie yowled from the porch anxiously. 32 | 33 | Fluffy cried aloud “Mama! It’s me!” Momentarily stunned to see her missing kitten bounding towards her, Millie was suddenly overcome with relief and rushed down the steps towards Fluffy. She scooped the prodigal kitten up into her warm fluffy belly, covering her in comforting licks. Fluffy cried happy tears, overjoyed to be reunited with her mama once more. 34 | 35 | Safe in the cottage once more, curled up with the rest of her siblings who purred loudly, grooming Fluffy's matted, mud-soaked fur, the little tabby knew she would never stray far from home again. Her big adventure into the woods had scared her right down to her little pink jellybean toes and she was perfectly content now being at home with the family that loved her. She would adventure again someday for sure - but not until she was a big girl cat! 36 | 37 | Fluffy was a tiny tabby kitten with a squeaky meow and big curious eyes. She lived cozily with her mama cat named Millie and her four rambunctious, furry siblings. Fluffy's siblings all had names as well - there was Patches, the black and white tuxedo cat, Mittens, who had white paws, Rascal, a hyper orange kitten who always got into trouble, and Sweetie, a gentle calico cat that Fluffy slept snuggled up next to every night. 38 | 39 | The little family of cats lived together in a small cottage on the outskirts of a quaint neighborhood that backed up against a large park filled with trees, grassy meadows, and a little creek that babbled through it. Their human owner, Mrs. Wigglesworth, had taken in mama cat Millie when she was pregnant with kittens and about to give birth. Now, Millie and her kitten brood had a cozy place to call home. 40 | 41 | One sunny spring morning, as birds chirped outside the cottage's windows, little Fluffy was batting playfully at some string Mrs. Wigglesworth had tied to a stick, amused by how it danced just out of reach of her grasp. Her siblings Mittens, Sweetie, Patches, and Rascal were still snoozing, curled up together in their cat bed, their tiny bodies rising and falling rhythmically with each sleepy breath. 42 | 43 | Fluffy's short attention span got the best of her quickly and she lost interest in her toy. She trotted over to the slightly opened window and peered outside, entranced by what she saw - vibrant pink and yellow tulips swaying gently in the breeze, buzzing bumble bees hovering from flower to flower, and a vibrant blue butterfly with spots like gemstones flapping lazily past. 44 | 45 | Overcome by curiosity, the tiny tabby kitten pawed open the window a little wider and squeezed out onto the porch roof, the textured shingles scratchy on her paws. “Wow!” Fluffy squeaked, her big eyes round with wonder. She crawled to the edge of the roof to get a better look at the beautiful world outside her family's cozy cottage, things she had only peeked at from behind the window glass. 46 | 47 | Just then, that same shimmering blue butterfly caught her attention again, dancing on the breeze just out of reach. Compelled to give chase, Fluffy gathered up her hind legs and pounced as hard as she could, plunging straight off the porch roof! But being such a tiny kitten, she floated gently down onto the flower beds below unharmed if not a bit surprised. The fall had given the butterfly the head start it needed to stay ahead of the ambitious Fluffy. 48 | 49 | Determined to catch the twirling blue insect and utterly distracted by her mission, Fluffy let the butterfly lead her further and further into the woods that marked the edge of the park close by. She climbed over fallen logs, scrambled through brush and bushes on her short, clumsy legs. Her light grey striped fur was covered in flecks of mud and debris but she marched on happily through the little forest. 50 | 51 | After struggling through a particularly thick bramble bush, Fluffy finally trapped the now tired butterfly against a hollow log. “Gotcha!” she cried triumphantly, but just as she lifted a paw to take a swat, the terrified insect squeezed into a tiny gap in the rotting wood, once again eluding capture by the ambitious but unskillful kitten. 52 | 53 | Saddened at losing her newfound friend, Fluffy decided she had wandered far enough into the forest and began trying to retrace her steps back home. But everywhere she turned looked unfamiliar now - the towering trees stretched on forever in each direction. "Mama?" Fluffy timidly called out, her little mew echoing back at her tauntingly. 54 | 55 | Fluffy realized then that she was well and truly lost in the woods. Nervous tears pricked at her eyes making her normally perfect vision even more distorted. She tried smelling for her mama's familiar scent but was overwhelmed by the earthy smells of mud and moss. 56 | 57 | The sun was now setting low, casting ominous shadows everywhere. Strange night sounds echoed from all around - hoots, croaks, and rustles in the dark. Fluffy decided she had to find shelter fast or she may end up as another woodland creature's late night snack! 58 | 59 | Spotting a hollow opening in the trunk of a nearby fallen tree, the little grey tabby ducked inside the makeshift den, cramming her body as far back into the decaying wood as she could squeeze. Her tiny heart pounded in her fuzzy chest, cold tears soaking into her fur. Curled up tightly in the only shelter she could find, Fluffy meowed forlornly, “Mama? Mittens? Sweetie?” Her sad cries went unanswered except for faint echoes. Unable to fight her exhaustion any longer, Fluffy finally drifted off to sleep, bad dreams playing out behind her eyelids. 60 | 61 | The next morning, rays of sunlight pierced through the cracks in Fluffy's hollow tree trunk den. She awoke confused for a moment before remembering the whole awful ordeal. Stomach growling for food and parched for water, Fluffy knew she had to try to find her way out of these woods. She sniffed at the air and detected the promising scent of fresh water. Following the smell led her to a gently flowing stream with tiny fish darting through the crystal waters. 62 | 63 | Fluffy lapped up the cool water eagerly, relieving her dangerous thirst. As she followed alongside the babbling brook, eating some beetles and grass here and there, Fluffy became more hopeful that the stream would take her somewhere safe. 64 | 65 | Finally, after hours of walking, the trees opened up, revealing a sunny meadow filled with gently swaying reeds. Fluffy recognized the park near her family's cottage just across the field! Filled with renewed vigor, little Fluffy bounded through the tall grass, her fur slick with morning dew. 66 | 67 | As Fluffy neared the edge of the park close to the familiar dirt path that passed Mrs. Wigglesworth's cottage, she heard the distant but distinct sound of her mama cat crying out for her babies to come get milk. "Mittens! Sweetie! Fluffy!" Millie yowled from the porch anxiously. 68 | 69 | Fluffy cried aloud “Mama! It’s me!” Momentarily stunned to see her missing kitten bounding towards her, Millie was suddenly overcome with relief and rushed down the steps towards Fluffy. She scooped the prodigal kitten up into her warm fluffy belly, covering her in comforting licks. Fluffy cried happy tears, overjoyed to be reunited with her mama once more. 70 | 71 | Safe in the cottage once more, curled up with the rest of her siblings who purred loudly, grooming Fluffy's matted, mud-soaked fur, the little tabby knew she would never stray far from home again. Her big adventure into the woods had scared her right down to her little pink jellybean toes and she was perfectly content now being at home with the family that loved her. She would adventure again someday for sure - but not until she was a big girl cat! 72 | 73 | Fluffy was a tiny tabby kitten with a squeaky meow and big curious eyes. She lived cozily with her mama cat named Millie and her four rambunctious, furry siblings. Fluffy's siblings all had names as well - there was Patches, the black and white tuxedo cat, Mittens, who had white paws, Rascal, a hyper orange kitten who always got into trouble, and Sweetie, a gentle calico cat that Fluffy slept snuggled up next to every night. 74 | 75 | The little family of cats lived together in a small cottage on the outskirts of a quaint neighborhood that backed up against a large park filled with trees, grassy meadows, and a little creek that babbled through it. Their human owner, Mrs. Wigglesworth, had taken in mama cat Millie when she was pregnant with kittens and about to give birth. Now, Millie and her kitten brood had a cozy place to call home. 76 | 77 | One sunny spring morning, as birds chirped outside the cottage's windows, little Fluffy was batting playfully at some string Mrs. Wigglesworth had tied to a stick, amused by how it danced just out of reach of her grasp. Her siblings Mittens, Sweetie, Patches, and Rascal were still snoozing, curled up together in their cat bed, their tiny bodies rising and falling rhythmically with each sleepy breath. 78 | 79 | Fluffy's short attention span got the best of her quickly and she lost interest in her toy. She trotted over to the slightly opened window and peered outside, entranced by what she saw - vibrant pink and yellow tulips swaying gently in the breeze, buzzing bumble bees hovering from flower to flower, and a vibrant blue butterfly with spots like gemstones flapping lazily past. 80 | 81 | Overcome by curiosity, the tiny tabby kitten pawed open the window a little wider and squeezed out onto the porch roof, the textured shingles scratchy on her paws. “Wow!” Fluffy squeaked, her big eyes round with wonder. She crawled to the edge of the roof to get a better look at the beautiful world outside her family's cozy cottage, things she had only peeked at from behind the window glass. 82 | 83 | Just then, that same shimmering blue butterfly caught her attention again, dancing on the breeze just out of reach. Compelled to give chase, Fluffy gathered up her hind legs and pounced as hard as she could, plunging straight off the porch roof! But being such a tiny kitten, she floated gently down onto the flower beds below unharmed if not a bit surprised. The fall had given the butterfly the head start it needed to stay ahead of the ambitious Fluffy. 84 | 85 | Determined to catch the twirling blue insect and utterly distracted by her mission, Fluffy let the butterfly lead her further and further into the woods that marked the edge of the park close by. She climbed over fallen logs, scrambled through brush and bushes on her short, clumsy legs. Her light grey striped fur was covered in flecks of mud and debris but she marched on happily through the little forest. 86 | 87 | After struggling through a particularly thick bramble bush, Fluffy finally trapped the now tired butterfly against a hollow log. “Gotcha!” she cried triumphantly, but just as she lifted a paw to take a swat, the terrified insect squeezed into a tiny gap in the rotting wood, once again eluding capture by the ambitious but unskillful kitten. 88 | 89 | Saddened at losing her newfound friend, Fluffy decided she had wandered far enough into the forest and began trying to retrace her steps back home. But everywhere she turned looked unfamiliar now - the towering trees stretched on forever in each direction. "Mama?" Fluffy timidly called out, her little mew echoing back at her tauntingly. 90 | 91 | Fluffy realized then that she was well and truly lost in the woods. Nervous tears pricked at her eyes making her normally perfect vision even more distorted. She tried smelling for her mama's familiar scent but was overwhelmed by the earthy smells of mud and moss. 92 | 93 | The sun was now setting low, casting ominous shadows everywhere. Strange night sounds echoed from all around - hoots, croaks, and rustles in the dark. Fluffy decided she had to find shelter fast or she may end up as another woodland creature's late night snack! 94 | 95 | Spotting a hollow opening in the trunk of a nearby fallen tree, the little grey tabby ducked inside the makeshift den, cramming her body as far back into the decaying wood as she could squeeze. Her tiny heart pounded in her fuzzy chest, cold tears soaking into her fur. Curled up tightly in the only shelter she could find, Fluffy meowed forlornly, “Mama? Mittens? Sweetie?” Her sad cries went unanswered except for faint echoes. Unable to fight her exhaustion any longer, Fluffy finally drifted off to sleep, bad dreams playing out behind her eyelids. 96 | 97 | The next morning, rays of sunlight pierced through the cracks in Fluffy's hollow tree trunk den. She awoke confused for a moment before remembering the whole awful ordeal. Stomach growling for food and parched for water, Fluffy knew she had to try to find her way out of these woods. She sniffed at the air and detected the promising scent of fresh water. Following the smell led her to a gently flowing stream with tiny fish darting through the crystal waters. 98 | 99 | Fluffy lapped up the cool water eagerly, relieving her dangerous thirst. As she followed alongside the babbling brook, eating some beetles and grass here and there, Fluffy became more hopeful that the stream would take her somewhere safe. 100 | 101 | Finally, after hours of walking, the trees opened up, revealing a sunny meadow filled with gently swaying reeds. Fluffy recognized the park near her family's cottage just across the field! Filled with renewed vigor, little Fluffy bounded through the tall grass, her fur slick with morning dew. 102 | 103 | As Fluffy neared the edge of the park close to the familiar dirt path that passed Mrs. Wigglesworth's cottage, she heard the distant but distinct sound of her mama cat crying out for her babies to come get milk. "Mittens! Sweetie! Fluffy!" Millie yowled from the porch anxiously. 104 | 105 | Fluffy cried aloud “Mama! It’s me!” Momentarily stunned to see her missing kitten bounding towards her, Millie was suddenly overcome with relief and rushed down the steps towards Fluffy. She scooped the prodigal kitten up into her warm fluffy belly, covering her in comforting licks. Fluffy cried happy tears, overjoyed to be reunited with her mama once more. 106 | 107 | Safe in the cottage once more, curled up with the rest of her siblings who purred loudly, grooming Fluffy's matted, mud-soaked fur, the little tabby knew she would never stray far from home again. Her big adventure into the woods had scared her right down to her little pink jellybean toes and she was perfectly content now being at home with the family that loved her. She would adventure again someday for sure - but not until she was a big girl cat! -------------------------------------------------------------------------------- /ch5/data/sample.md: -------------------------------------------------------------------------------- 1 | An h1 header 2 | ============ 3 | 4 | Paragraphs are separated by a blank line. 5 | 6 | 2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists 7 | look like: 8 | 9 | * this one 10 | * that one 11 | * the other one 12 | 13 | Note that --- not considering the asterisk --- the actual text 14 | content starts at 4-columns in. 15 | 16 | > Block quotes are 17 | > written like so. 18 | > 19 | > They can span multiple paragraphs, 20 | > if you like. 21 | 22 | Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all 23 | in chapters 12--14"). Three dots ... will be converted to an ellipsis. 24 | Unicode is supported. ☺ 25 | 26 | 27 | 28 | An h2 header 29 | ------------ 30 | 31 | Here's a numbered list: 32 | 33 | 1. first item 34 | 2. second item 35 | 3. third item 36 | 37 | Note again how the actual text starts at 4 columns in (4 characters 38 | from the left side). Here's a code sample: 39 | 40 | # Let me re-iterate ... 41 | for i in 1 .. 10 { do-something(i) } 42 | 43 | As you probably guessed, indented 4 spaces. By the way, instead of 44 | indenting the block, you can use delimited blocks, if you like: 45 | 46 | ~~~ 47 | define foobar() { 48 | print "Welcome to flavor country!"; 49 | } 50 | ~~~ 51 | 52 | (which makes copying & pasting easier). You can optionally mark the 53 | delimited block for Pandoc to syntax highlight it: 54 | 55 | ~~~python 56 | import time 57 | # Quick, count to ten! 58 | for i in range(10): 59 | # (but not *too* quick) 60 | time.sleep(0.5) 61 | print i 62 | ~~~ 63 | 64 | 65 | 66 | ### An h3 header ### 67 | 68 | Now a nested list: 69 | 70 | 1. First, get these ingredients: 71 | 72 | * carrots 73 | * celery 74 | * lentils 75 | 76 | 2. Boil some water. 77 | 78 | 3. Dump everything in the pot and follow 79 | this algorithm: 80 | 81 | find wooden spoon 82 | uncover pot 83 | stir 84 | cover pot 85 | balance wooden spoon precariously on pot handle 86 | wait 10 minutes 87 | goto first step (or shut off burner when done) 88 | 89 | Do not bump wooden spoon or it will fall. 90 | 91 | Notice again how text always lines up on 4-space indents (including 92 | that last line which continues item 3 above). 93 | 94 | Here's a link to [a website](http://foo.bar), to a [local 95 | doc](local-doc.html), and to a [section heading in the current 96 | doc](#an-h2-header). Here's a footnote [^1]. 97 | 98 | [^1]: Footnote text goes here. 99 | 100 | Tables can look like this: 101 | 102 | size material color 103 | ---- ------------ ------------ 104 | 9 leather brown 105 | 10 hemp canvas natural 106 | 11 glass transparent 107 | 108 | Table: Shoes, their sizes, and what they're made of 109 | 110 | (The above is the caption for the table.) Pandoc also supports 111 | multi-line tables: 112 | 113 | -------- ----------------------- 114 | keyword text 115 | -------- ----------------------- 116 | red Sunsets, apples, and 117 | other red or reddish 118 | things. 119 | 120 | green Leaves, grass, frogs 121 | and other things it's 122 | not easy being. 123 | -------- ----------------------- 124 | 125 | A horizontal rule follows. 126 | 127 | *** 128 | 129 | Here's a definition list: 130 | 131 | apples 132 | : Good for making applesauce. 133 | oranges 134 | : Citrus! 135 | tomatoes 136 | : There's no "e" in tomatoe. 137 | 138 | Again, text is indented 4 spaces. (Put a blank line between each 139 | term/definition pair to spread things out more.) 140 | 141 | Here's a "line block": 142 | 143 | | Line one 144 | | Line too 145 | | Line tree 146 | 147 | and images can be specified like so: 148 | 149 | ![example image](example-image.jpg "An exemplary image") 150 | 151 | Inline math equations go in like so: $\omega = d\phi / dt$. Display 152 | math should get its own line and be put in in double-dollarsigns: 153 | 154 | $$I = \int \rho R^{2} dV$$ 155 | 156 | And note that you can backslash-escape any punctuation characters 157 | which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc. -------------------------------------------------------------------------------- /ch5/files/others/sample.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Example of a simple HTML page 9 | 10 | 16 | 36 | 42 | 43 | 44 | 45 | 46 | 75 | 76 | 82 |

Example of a simple HTML page

83 | 84 |

Hypertext Markup Language (HTML) is the most common language used to 85 | create documents on the World Wide Web. HTML uses hundreds of different 86 | tags to define a layout for web pages. Most tags require an opening <tag> 87 | and a closing </tag>.

88 | 89 |

Example:  <b>On 90 | a webpage, this sentence would be in bold print.</b>

91 | 92 |

Below is an example of a very simple page:

93 | 94 |

95 | 96 |

 This 97 | is the code used to make the page:

98 | 99 |

<HTML>

100 | 101 |

<HEAD>

102 | 103 |

<TITLE>Your Title Here</TITLE> 104 |

105 | 106 |

</HEAD>

107 | 108 |

<BODY BGCOLOR="FFFFFF"> 109 |

110 | 111 |

<CENTER><IMG SRC="clouds.jpg" 112 | ALIGN="BOTTOM"> </CENTER>

113 | 114 |

<HR>

115 | 116 |

<a href="http://somegreatsite.com">Link 117 | Name</a>

118 | 119 |

is a link to another nifty site

120 | 121 |

<H1>This is a Header</H1>

122 | 123 |

<H2>This is a Medium Header</H2> 124 |

125 | 126 |

Send me mail at <a href="mailto:support@yourcompany.com">

127 | 128 |

support@yourcompany.com</a>.

129 | 130 |

<P> This is a new paragraph!

131 | 132 |

<P> <B>This is a new paragraph!</B> 133 |

134 | 135 |

<BR> <B><I>This is a new 136 | sentence without a paragraph break, in bold italics.</I></B> 137 |

138 | 139 |

<HR>

140 | 141 |

</BODY>

142 | 143 |

</HTML>

144 | 145 |

 

146 | 147 |

 

148 | 149 |

 

150 | 151 | 157 | 158 | 159 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /ch5/files/others/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "quiz": { 3 | "sport": { 4 | "q1": { 5 | "question": "Which one is correct team name in NBA?", 6 | "options": [ 7 | "New York Bulls", 8 | "Los Angeles Kings", 9 | "Golden State Warriros", 10 | "Huston Rocket" 11 | ], 12 | "answer": "Huston Rocket" 13 | } 14 | }, 15 | "maths": { 16 | "q1": { 17 | "question": "5 + 7 = ?", 18 | "options": [ 19 | "10", 20 | "11", 21 | "12", 22 | "13" 23 | ], 24 | "answer": "12" 25 | }, 26 | "q2": { 27 | "question": "12 - 8 = ?", 28 | "options": [ 29 | "1", 30 | "2", 31 | "3", 32 | "4" 33 | ], 34 | "answer": "4" 35 | } 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /ch5/files/others/sample.md: -------------------------------------------------------------------------------- 1 | An h1 header 2 | ============ 3 | 4 | Paragraphs are separated by a blank line. 5 | 6 | 2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists 7 | look like: 8 | 9 | * this one 10 | * that one 11 | * the other one 12 | 13 | Note that --- not considering the asterisk --- the actual text 14 | content starts at 4-columns in. 15 | 16 | > Block quotes are 17 | > written like so. 18 | > 19 | > They can span multiple paragraphs, 20 | > if you like. 21 | 22 | Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all 23 | in chapters 12--14"). Three dots ... will be converted to an ellipsis. 24 | Unicode is supported. ☺ 25 | 26 | 27 | 28 | An h2 header 29 | ------------ 30 | 31 | Here's a numbered list: 32 | 33 | 1. first item 34 | 2. second item 35 | 3. third item 36 | 37 | Note again how the actual text starts at 4 columns in (4 characters 38 | from the left side). Here's a code sample: 39 | 40 | # Let me re-iterate ... 41 | for i in 1 .. 10 { do-something(i) } 42 | 43 | As you probably guessed, indented 4 spaces. By the way, instead of 44 | indenting the block, you can use delimited blocks, if you like: 45 | 46 | ~~~ 47 | define foobar() { 48 | print "Welcome to flavor country!"; 49 | } 50 | ~~~ 51 | 52 | (which makes copying & pasting easier). You can optionally mark the 53 | delimited block for Pandoc to syntax highlight it: 54 | 55 | ~~~python 56 | import time 57 | # Quick, count to ten! 58 | for i in range(10): 59 | # (but not *too* quick) 60 | time.sleep(0.5) 61 | print i 62 | ~~~ 63 | 64 | 65 | 66 | ### An h3 header ### 67 | 68 | Now a nested list: 69 | 70 | 1. First, get these ingredients: 71 | 72 | * carrots 73 | * celery 74 | * lentils 75 | 76 | 2. Boil some water. 77 | 78 | 3. Dump everything in the pot and follow 79 | this algorithm: 80 | 81 | find wooden spoon 82 | uncover pot 83 | stir 84 | cover pot 85 | balance wooden spoon precariously on pot handle 86 | wait 10 minutes 87 | goto first step (or shut off burner when done) 88 | 89 | Do not bump wooden spoon or it will fall. 90 | 91 | Notice again how text always lines up on 4-space indents (including 92 | that last line which continues item 3 above). 93 | 94 | Here's a link to [a website](http://foo.bar), to a [local 95 | doc](local-doc.html), and to a [section heading in the current 96 | doc](#an-h2-header). Here's a footnote [^1]. 97 | 98 | [^1]: Footnote text goes here. 99 | 100 | Tables can look like this: 101 | 102 | size material color 103 | ---- ------------ ------------ 104 | 9 leather brown 105 | 10 hemp canvas natural 106 | 11 glass transparent 107 | 108 | Table: Shoes, their sizes, and what they're made of 109 | 110 | (The above is the caption for the table.) Pandoc also supports 111 | multi-line tables: 112 | 113 | -------- ----------------------- 114 | keyword text 115 | -------- ----------------------- 116 | red Sunsets, apples, and 117 | other red or reddish 118 | things. 119 | 120 | green Leaves, grass, frogs 121 | and other things it's 122 | not easy being. 123 | -------- ----------------------- 124 | 125 | A horizontal rule follows. 126 | 127 | *** 128 | 129 | Here's a definition list: 130 | 131 | apples 132 | : Good for making applesauce. 133 | oranges 134 | : Citrus! 135 | tomatoes 136 | : There's no "e" in tomatoe. 137 | 138 | Again, text is indented 4 spaces. (Put a blank line between each 139 | term/definition pair to spread things out more.) 140 | 141 | Here's a "line block": 142 | 143 | | Line one 144 | | Line too 145 | | Line tree 146 | 147 | and images can be specified like so: 148 | 149 | ![example image](example-image.jpg "An exemplary image") 150 | 151 | Inline math equations go in like so: $\omega = d\phi / dt$. Display 152 | math should get its own line and be put in in double-dollarsigns: 153 | 154 | $$I = \int \rho R^{2} dV$$ 155 | 156 | And note that you can backslash-escape any punctuation characters 157 | which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc. -------------------------------------------------------------------------------- /ch5/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch5/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch5/index_cache/default__vector_store.json: -------------------------------------------------------------------------------- 1 | {"embedding_dict": {"855a4bd9-ed6b-4306-a571-752f33700cfc": [-0.006990846712142229, 0.03620882332324982, -0.015574716962873936, -0.008744540624320507, -0.004139811731874943, 0.017366014420986176, -0.012915118597447872, -0.04939058795571327, -0.0010554640321061015, -0.01520551834255457, 0.00724723469465971, 0.024503856897354126, -0.025584105402231216, 0.016408832743763924, -0.0027125852648168802, 0.007842054590582848, 0.021960487589240074, 0.014932038262486458, 0.015396954491734505, -0.0041329748928546906, -0.008457385934889317, 0.009619678370654583, -0.012340810149908066, -0.004033837933093309, -0.03527899086475372, 0.002825395902618766, 0.029481202363967896, -0.018733417615294456, -0.0065020001493394375, -0.007213049568235874, 0.02201518416404724, -0.006143056787550449, -0.018473610281944275, -0.009585493244230747, -0.019129963591694832, 0.006970335263758898, 0.011739152483642101, -0.021495571359992027, 0.01127423532307148, -0.0022681793197989464, 0.014002203941345215, 0.003804798237979412, -0.0030903301667422056, -0.017393363639712334, 0.0015135438879951835, 0.018788114190101624, 0.012710008770227432, -0.0291803739964962, -0.014535491354763508, 0.012327135540544987, 0.010207661427557468, 0.006006316747516394, -0.012333972379565239, 0.0020596503745764494, -0.01140413898974657, 0.0037227540742605925, -0.005746509879827499, 0.009708559140563011, -0.005172200966626406, 0.010864014737308025, 0.009571819566190243, -0.007103657349944115, -0.00814972072839737, -0.022630514577031136, -0.012580105103552341, -0.018446262925863266, -0.015383280813694, 0.018131760880351067, -0.00477223563939333, 0.006683181039988995, 0.005661047529429197, 0.018131760880351067, -0.012744193896651268, 0.0015827686293050647, 0.01706518605351448, -0.020032450556755066, -0.03347402065992355, -0.01460386160761118, 0.011944263242185116, -0.013790256343781948, 0.007554900366812944, -0.043592799454927444, 0.028277888894081116, 0.02790869027376175, 0.01754377782344818, 0.0037945425137877464, 0.0006721639074385166, 0.03210661560297012, -0.012156210839748383, -0.033884238451719284, 0.00640628207474947, 0.022753581404685974, 0.0059276907704770565, 0.015232866629958153, 0.002774118212983012, 0.02682844176888466, 0.01498673390597105, 0.03380219638347626, 0.018692394718527794, -0.008402690291404724, -0.011821196414530277, 0.013393709436058998, -0.014002203941345215, 0.0008777016191743314, -0.03355606272816658, -0.009298338554799557, 0.027252336964011192, -0.023874852806329727, -0.008655658923089504, 0.004064604640007019, -0.019608555361628532, 0.020907588303089142, -0.021796399727463722, -0.0599469356238842, -0.007110494188964367, -0.010556349530816078, 0.04706600308418274, -0.024189354851841927, 5.9343143220758066e-05, 0.0177625622600317, -0.007671129424124956, 0.025392668321728706, 0.024818358942866325, -0.016190048307180405, 0.009872647933661938, -0.0022476683370769024, 0.007732662372291088, -0.021604962646961212, 0.015848197042942047, -0.009982040151953697, 0.04643699899315834, 0.010590534657239914, 0.01408424787223339, 0.002321166219189763, -0.003644128330051899, 0.036072082817554474, -0.014111596159636974, -0.007623270153999329, -0.015930242836475372, 0.009694885462522507, 0.026172088459134102, 0.027443772181868553, 0.007828380912542343, -0.01973162218928337, 0.0026784001383930445, -0.023970570415258408, 0.022097228094935417, -0.011034940369427204, -0.01336636207997799, -0.020305931568145752, 0.015410629101097584, -0.0158618725836277, -0.005435425788164139, -0.00724723469465971, -0.004919231403619051, 0.027963386848568916, 0.0017118173418566585, -0.0018494122195988894, 0.019362423568964005, -0.023997917771339417, 0.007766847498714924, 0.03530633822083473, -0.003194594755768776, 0.0043107373639941216, -0.002768990583717823, 0.027238663285970688, 0.03339197486639023, -0.005377311259508133, -0.020319605246186256, -0.006341330241411924, -0.011151169426739216, 0.020210212096571922, -0.04881627857685089, 0.014795297756791115, -0.006621647626161575, -0.0014349182602018118, 0.03251683712005615, 0.02026490867137909, -0.0056439549662172794, -0.012457039207220078, -0.01947181485593319, -0.012915118597447872, 0.011869056150317192, 0.05559859424829483, -0.024845708161592484, -0.032462142407894135, 0.04033838212490082, -0.020729824900627136, 0.02524225413799286, -0.002984356600791216, 0.012901444919407368, 0.03637291118502617, 0.0008524901350028813, 0.002380989957600832, -0.596843957901001, -0.014289358630776405, -0.011014428921043873, -0.028059104457497597, 0.008737703785300255, 0.006102034822106361, -0.0180770643055439, 0.008935976773500443, -0.007766847498714924, 0.030301645398139954, 0.006751550827175379, -0.005035460460931063, 0.0223570354282856, 0.01467223186045885, -0.0010058956686407328, -0.03038368932902813, 0.00603366456925869, 0.0013554379111155868, -0.012921956367790699, 0.006915639154613018, 0.0022408312652260065, -0.0009016311960294843, -0.009496612474322319, -0.011752826161682606, -0.014521816745400429, 0.03150495886802673, 0.008778725750744343, 0.006546440534293652, 0.023204823955893517, 0.0380684919655323, -0.04170578345656395, -0.011793849058449268, 0.011520368047058582, -0.011916914954781532, 0.043018490076065063, -0.027990734204649925, -0.006139638368040323, 0.014248336665332317, 0.02915302664041519, 0.026308828964829445, -0.03016490489244461, -0.0009110320825129747, 0.0061054532416164875, 0.007089983206242323, -0.0009118866873905063, -0.022603167220950127, 0.02733438089489937, 0.006078105419874191, 0.007124168332666159, -0.014631208963692188, -0.004095371346920729, 0.02449018321931362, 0.019143639132380486, -0.007657455280423164, 0.0031279337126761675, -0.002124601975083351, 0.03675578534603119, -0.01947181485593319, 0.007855729199945927, -0.009229968301951885, -0.00553456274792552, 0.004833768587559462, -0.02572084590792656, -0.0405845120549202, -9.598526230547577e-05, -0.005148271098732948, -0.013954345136880875, -0.012039980851113796, -0.00795828364789486, -0.02273990772664547, -0.011130657978355885, 0.005127760116010904, -0.01954018510878086, -0.01706518605351448, 0.009264153428375721, 0.02749846875667572, 0.004943160805851221, 0.018008694052696228, -0.011650271713733673, 0.030903302133083344, 0.0005696087027899921, 0.0059482017531991005, -0.03670109063386917, -0.010474304668605328, 0.040201641619205475, 0.008525756187736988, -0.038615453988313675, 0.0293444637209177, 0.03120413050055504, -0.01576615311205387, 0.011110147461295128, 0.006642158608883619, 0.006307145114988089, -0.03927180543541908, -0.01487734168767929, 0.02048369310796261, -0.014631208963692188, -0.0017827512929216027, -0.005914017092436552, -0.021550267934799194, -0.00868984404951334, -0.001522944774478674, -5.245271313469857e-05, 0.028305236250162125, 0.03834197297692299, 0.008313808590173721, -0.011390464380383492, -0.00036535292747430503, 0.003671476384624839, -0.019321400672197342, -0.029453855007886887, -0.004109045024961233, -0.030930649489164352, -0.010330727323889732, -0.011281072162091732, -0.02271256037056446, 0.02403894066810608, 0.008395852521061897, 0.014344055205583572, 0.015533694997429848, 0.04621821269392967, -0.002979228738695383, -0.009270990267395973, -0.012600616551935673, 0.016805380582809448, -0.003139898646622896, 0.030875954777002335, -0.03232540190219879, 0.0030629821121692657, 0.012053655460476875, -0.006922476459294558, -0.0007166045252233744, 0.03867014870047569, -0.03637291118502617, -0.01122637651860714, 0.009845299646258354, 0.008894954808056355, -0.003377484856173396, 0.025037143379449844, 0.005370474420487881, -0.033282581716775894, 0.0024305584374815226, 0.017735213041305542, -0.02533797360956669, -0.01043328270316124, -0.04392097517848015, -0.026595983654260635, 0.007424996700137854, -0.004642332438379526, -0.0041945078410208225, -0.013646678999066353, -0.02382015623152256, -0.013058695942163467, 0.019403444603085518, 0.008204416371881962, -0.016477202996611595, 0.008224927820265293, -0.03691987320780754, -0.022794604301452637, -0.018801787868142128, -0.014043225906789303, 0.032462142407894135, -0.020005101338028908, 0.007999305613338947, 0.0012528827646747231, 0.01290828175842762, 0.00044013274600729346, 0.03733009472489357, 0.013318502344191074, -0.02312278002500534, -0.008183905854821205, 0.01480897143483162, -0.024941425770521164, -0.0055106328800320625, -0.002550206147134304, -0.01908894255757332, -0.027771949768066406, -0.013263806700706482, -0.009346197359263897, -0.002991193439811468, 0.022821951657533646, 0.0001621654228074476, 0.01655924692749977, 0.005975550040602684, 0.004091952927410603, 0.020196538418531418, 0.013236458413302898, -0.003702242858707905, 0.017051512375473976, 0.0006507982616312802, 0.016791705042123795, 0.022316012531518936, 0.00012701889500021935, 0.0164908766746521, 0.0022442496847361326, 0.03440385311841965, -0.018788114190101624, 0.004902138840407133, 0.006211427040398121, -0.0007054943707771599, -0.014070574194192886, 0.008047165349125862, -0.004420129582285881, -0.04085799306631088, 0.016285765916109085, -0.028059104457497597, 0.01978631690144539, -0.026650680229067802, 0.00795828364789486, -0.012286113575100899, 0.017625821754336357, -0.022575819864869118, -0.03672843798995018, 0.01884280890226364, 0.009660700336098671, 0.007486530113965273, -0.015301236882805824, 0.0050696455873548985, 0.001061446382664144, 0.011048614047467709, 0.007308767642825842, 0.006655832752585411, -0.0035313176922500134, 0.017625821754336357, 0.01757112517952919, 0.003596269292756915, 0.02552940882742405, 0.019294053316116333, -0.02216559834778309, -0.031040042638778687, 0.020565737038850784, 0.007233560550957918, 0.008915466256439686, -0.008450549095869064, 0.01191007811576128, 0.014371402561664581, 0.019799992442131042, -0.02201518416404724, 0.03656435012817383, -0.011841707862913609, -0.005144852679222822, -0.001209296751767397, 0.010440119542181492, -0.020948609337210655, 0.018665047362446785, 0.022288665175437927, 0.028688110411167145, 0.019129963591694832, -0.006184078752994537, 0.007370300590991974, -0.026951508596539497, 0.01520551834255457, -0.016955794766545296, -0.01549267303198576, 0.024749990552663803, -0.016190048307180405, 0.007787358481436968, 0.0052268970757722855, 0.016313115134835243, 0.013776582665741444, 0.01947181485593319, -0.026760071516036987, 0.011308420449495316, 0.024271398782730103, 0.01699681580066681, -0.02877015434205532, -0.019485488533973694, -0.029125679284334183, -0.02252112329006195, 0.013325340114533901, -0.009004347026348114, -0.01056318636983633, -0.002628831658512354, -0.007144679315388203, -0.028524020686745644, 0.015301236882805824, -0.00868984404951334, -0.009318850003182888, -0.004597891587764025, 0.03407567739486694, -0.040393076837062836, -0.0003738991799764335, 0.023519327864050865, 0.013202273286879063, -0.01319543644785881, 0.00983162596821785, -0.0046730986796319485, -0.005353381857275963, 0.01824115216732025, 0.022849299013614655, -0.008101860992610455, 0.01699681580066681, 0.009975203312933445, -0.026965182274580002, -0.005014949478209019, -0.0002589518844615668, 0.02223396860063076, 0.004064604640007019, 0.00038800053880549967, 0.0018425751477479935, 0.0015596937155351043, 0.006727621424943209, -0.04006490111351013, -0.007971958257257938, 0.04744887351989746, -0.01937609724700451, -0.0018665047828108072, -0.019704272970557213, 0.005620025098323822, -0.010652067139744759, 0.003921027295291424, -0.03038368932902813, -0.01709253340959549, 0.016955794766545296, 0.015998611226677895, -0.0031621188391000032, 0.013831278309226036, 0.018418915569782257, 0.03259888291358948, 0.010330727323889732, -0.0014297905145213008, -0.020593084394931793, -0.021796399727463722, 0.016354136168956757, 0.02641822025179863, 0.026691701263189316, 0.009510286152362823, -0.006768643390387297, -0.007650618441402912, -0.000351892551407218, -0.01579350233078003, -0.02654128707945347, -0.011978448368608952, -0.022780928760766983, 0.0011101601412519813, -0.02711559645831585, 0.00421160040423274, -0.00994101818650961, 0.0478864423930645, 0.002331421710550785, -0.004761979915201664, -0.0339389368891716, -0.0033535552211105824, 0.0026886556297540665, 0.0062421937473118305, -0.01718825288116932, -0.015451651066541672, 0.023368913680315018, 0.036072082817554474, 0.005705487914383411, 0.017147229984402657, -0.014371402561664581, 0.013619331642985344, -0.02191946655511856, -0.009079554118216038, 0.019923057407140732, 0.033829543739557266, 0.019813666120171547, -0.01306553278118372, 0.0264865905046463, 0.03650965169072151, -0.0005213223048485816, 0.02898893877863884, -0.01874709129333496, 0.003204850247129798, 0.03459528833627701, -0.0010546093108132482, 0.005862739402800798, -0.017612148076295853, 0.011363117024302483, -0.030274296179413795, 0.02449018321931362, -0.012928793206810951, -0.007527552079409361, 0.005363637115806341, 0.019581208005547523, -0.018200131133198738, -0.036017388105392456, 0.029098330065608025, -0.0015460196882486343, -0.001059737172909081, -0.002543369075283408, -0.02937181107699871, -0.01675068400800228, -0.001104177674278617, -0.023109106346964836, 0.01725662313401699, -0.014125270769000053, 0.0021827167365700006, 0.014945711940526962, -0.01324329525232315, 0.015998611226677895, -0.05494224280118942, 0.02290399558842182, 0.017024165019392967, -0.007315604481846094, -0.02280827797949314, -0.002798047848045826, -0.0020305931102484465, 0.007096820045262575, -0.0033193700946867466, -0.010064084082841873, 0.0033415905199944973, 0.00793093629181385, -0.006108871661126614, 0.005992642603814602, 0.013940670527517796, -0.02127678692340851, -0.007513877935707569, 0.006693436298519373, 0.005062808748334646, 0.017817256972193718, -0.01718825288116932, 0.02182374708354473, -0.009072717279195786, -0.008826584555208683, -0.01295614056289196, -0.014904689975082874, 0.007356626912951469, 0.02360137179493904, -0.018829135224223137, 0.006259285844862461, -0.0009469264186918736, -0.012422854080796242, -0.014644883573055267, -0.007595922332257032, -0.022780928760766983, -0.004543195478618145, 0.016326788812875748, -0.016066981479525566, 0.009318850003182888, 0.0323527492582798, -0.010515326634049416, -0.0031621188391000032, 0.005274755880236626, -0.010946058668196201, 0.0028031757101416588, 0.009756418876349926, -0.004980764351785183, -0.01469957921653986, 0.013680864125490189, 0.018282175064086914, -0.003958630841225386, -0.022466426715254784, 0.012997163459658623, -0.011834871023893356, 0.023669742047786713, 0.007062635384500027, 0.00908639095723629, 0.01824115216732025, -0.001063155592419207, -0.019362423568964005, -0.016477202996611595, 0.003485167631879449, 0.009606004692614079, 0.019102616235613823, 0.0013015965232625604, -0.008006143383681774, -0.016518225893378258, 0.00010303593444405124, 0.004601310007274151, 0.012156210839748383, -0.019745295867323875, 0.008628311567008495, 0.00258439127355814, -0.010139291174709797, -0.0169694684445858, -0.021782726049423218, -0.02093493565917015, -0.03136821836233139, -0.02980937995016575, 0.003232198301702738, 0.002569007920101285, 0.033227887004613876, 0.015643088147044182, -0.015055104158818722, -0.010693089105188847, -0.013475754298269749, -0.004307318478822708, -0.01122637651860714, -0.010207661427557468, -0.005308941006660461, 0.01908894255757332, 0.004509010352194309, 0.017407037317752838, -0.007513877935707569, 0.01346207968890667, 0.003379194065928459, 0.016094330698251724, 0.005982386879622936, 0.0005174764664843678, -0.029672639444470406, -0.028524020686745644, -0.005414914805442095, -0.007753173355013132, 0.023710763081908226, -0.014904689975082874, -0.02660965733230114, -0.0006435338873416185, 0.0021827167365700006, 0.0177625622600317, 0.02680109441280365, -0.022466426715254784, -0.048570144921541214, -0.0038970978930592537, 0.01354412455111742, -0.020223885774612427, -0.009496612474322319, -0.019430793821811676, -0.027963386848568916, 0.04228009283542633, -0.008204416371881962, 0.025201233103871346, 0.018637700006365776, 0.02816849574446678, 0.0020767429377883673, 0.007445508148521185, -0.005435425788164139, 0.011246887966990471, -0.024244051426649094, 0.001717799692414701, -0.030821258202195168, -0.01944446749985218, 0.03828727826476097, 0.019362423568964005, 0.012388668954372406, 0.01419364009052515, -0.013004000298678875, -0.00948293786495924, -0.003423634683713317, -0.016600269824266434, -0.0007965121185407043, 0.012327135540544987, -0.01785827986896038, 0.00019570952281355858, -0.023546675220131874, -0.008724029175937176, -0.031696394085884094, -0.018596677109599113, 0.016764357686042786, 0.0008785562822595239, 0.010905036702752113, -0.0399555079638958, -0.016162700951099396, 0.002406628802418709, -0.02350565232336521, 0.05023837834596634, -0.004262878093868494, 0.01655924692749977, 0.02144087478518486, -0.008313808590173721, -0.010652067139744759, 0.005261082202196121, -0.02857871726155281, 0.0010896490421146154, 0.04364749416708946, 0.034239765256643295, 0.005182456225156784, 0.00729509349912405, -0.001924619311466813, -0.0003452691889833659, 0.015410629101097584, -0.03216131404042244, 0.025324298068881035, 0.01836421899497509, -0.007910424843430519, -0.02961794286966324, 0.011246887966990471, -0.04963671788573265, 0.012210906483232975, 0.001960513647645712, 0.010973406955599785, -0.009503449313342571, 0.02302706241607666, -0.036072082817554474, 0.021290460601449013, -0.015848197042942047, 0.027936037629842758, 0.029864076524972916, 0.018377892673015594, -0.019649578258395195, -0.0006832740618847311, -0.0377403162419796, 0.015561043284833431, 0.013127066195011139, 0.023040736094117165, -0.010857177898287773, 0.0199777539819479, 0.026500266045331955, 0.004885046277195215, -0.0064814891666173935, 0.004249203950166702, 0.01658659614622593, 0.021194742992520332, -0.029836727306246758, 0.006987427826970816, -0.016518225893378258, -0.015848197042942047, 0.0145901869982481, -0.017106208950281143, -0.01814543455839157, -0.006659251172095537, 0.0017810420831665397, -0.008942813612520695, 0.009134250693023205, -0.0048508611507713795, 0.026431895792484283, -0.006252449005842209, -0.00795828364789486, -0.02394322119653225, -0.009865811094641685, -0.015725132077932358, 0.02740275114774704, -0.03620882332324982, -0.030903302133083344, -0.015396954491734505, -0.0071173314936459064, 0.01262112706899643, 0.006337911821901798, 0.001266556791961193, 0.01754377782344818, 0.03754888102412224, -0.033282581716775894, -0.004071441479027271, 0.004488499369472265, 0.019129963591694832, -0.026267806068062782, -0.015506346710026264, -0.004656006116420031, -0.02641822025179863, -0.008416363969445229, -0.003177502192556858, -0.02749846875667572, -0.03514225035905838, -0.0031843390315771103, 0.01111698430031538, -0.018227478489279747, 0.009359871968626976, -0.01145199779421091, 0.01675068400800228, -0.003712498350068927, 0.0054456815123558044, -0.04266296699643135, -0.0011588737834244967, -0.014453447423875332, 0.004519266076385975, 0.023204823955893517, -0.005982386879622936, 0.007199375424534082, -0.010761459358036518, -0.020975958555936813, -0.018446262925863266, -0.01788562722504139, -0.01560206525027752, 0.0069190580397844315, -0.013318502344191074, -0.005650791805237532, 0.007712151389569044, -0.024982448667287827, 0.010057247243821621, -0.026760071516036987, 0.024941425770521164, 0.001902399118989706, 0.01498673390597105, 0.01975896954536438, 0.02663700468838215, 0.02451753057539463, -0.017133556306362152, -0.02340993471443653, -0.02591228112578392, -0.02124943770468235, -0.008259112946689129, -0.01605330780148506, -0.010905036702752113, -0.0018835972296074033, 0.026062697172164917, 0.032653577625751495, -0.029918771237134933, -0.02937181107699871, -0.011486182920634747, -0.03784970939159393, -0.041596390306949615, -0.012826237827539444, 0.015643088147044182, 0.02540634386241436, 0.02490040473639965, 0.01211518794298172, 0.029125679284334183, -0.015588391572237015, 0.008238601498305798, -0.018377892673015594, 0.011328931897878647, 0.004365433473140001, 0.012764704413712025, -0.005500377621501684, -0.01982733979821205, -0.010241846553981304, -0.021290460601449013, -0.003797961166128516, -0.015930242836475372, 0.022247642278671265, 0.031696394085884094, 0.01731131784617901, -0.012299787253141403, -0.022780928760766983, 0.0014212442329153419, 0.0037227540742605925, 0.00013909049448557198, -0.006792573258280754, -0.004703865386545658, 0.003454401157796383, 0.01848728582262993, -0.008614636957645416, -0.010645230300724506, 0.0043722703121602535, 0.014562839642167091, 0.0037330095656216145, 0.001636610133573413, -0.022493775933980942, -0.02819584496319294, -0.003804798237979412, -0.024722641333937645, 0.002806594129651785, -0.016928445547819138, -0.03429445996880531, 0.05401240661740303, 0.006895128171890974, -0.011937426403164864, 0.0028544531669467688, 0.03946324437856674, -0.012299787253141403, -0.0026869464199990034, 0.02682844176888466, 0.0004149212618358433, 0.012374995276331902, -0.0011212702374905348, 0.03877954185009003, -0.01398853026330471, 0.001921200891956687, -0.002787792356684804, -0.023861177265644073, -0.02514653652906418, 0.014043225906789303, -0.002510893391445279, -0.0065020001493394375, -0.03210661560297012, -0.027580512687563896, 0.013496264815330505, 0.00563711766153574, -0.007377137895673513, 0.0018511214293539524, -0.018036041408777237, 0.022192945703864098, 0.005189293529838324, -0.0035757580772042274, -0.011691293679177761, 0.015561043284833431, 0.02048369310796261, -0.0012759576784446836, 0.01448079477995634, 0.20084410905838013, -0.012682660482823849, 0.01184854470193386, 0.03459528833627701, -0.0003330907493364066, 0.009202620014548302, 0.01915731281042099, 0.0006815647939220071, -0.0031928853131830692, 0.020387975499033928, -0.003201431594789028, 0.009223131462931633, -0.0004815821594092995, -0.0005499522667378187, 0.02413465827703476, -0.01398853026330471, -0.02124943770468235, -0.03533368557691574, -0.024079961702227592, -0.018911179155111313, -0.01865137368440628, -0.0005828554276376963, -0.031067389994859695, -0.022343359887599945, -0.0012016051914542913, 0.013291154988110065, -0.032680924981832504, 0.014426099136471748, 0.009455589577555656, -0.005999479442834854, 0.0006029391661286354, 0.0007529261638410389, -0.00899067334830761, 0.010980243794620037, -0.010091432370245457, -0.009250479750335217, 0.033282581716775894, 0.01675068400800228, 0.026883138343691826, 0.027703579515218735, 0.021645985543727875, -0.03038368932902813, -0.014029552228748798, -0.012429690919816494, 0.02273990772664547, 0.02360137179493904, 0.00948293786495924, -0.0026664354372769594, -0.011096472851932049, 0.009332523681223392, -0.0183915663510561, 0.005982386879622936, 0.02819584496319294, -0.009858973324298859, -0.015041430480778217, -0.011787011288106441, -0.004293644800782204, 0.00842320080846548, 0.016354136168956757, 0.027758276090025902, -0.00763694429770112, 0.02896158955991268, -0.01982733979821205, 0.02639087289571762, -0.006122545804828405, 0.005175619386136532, -0.01963590271770954, -0.018637700006365776, 0.004485080949962139, -0.010918711312115192, -0.005414914805442095, -0.022589493542909622, -0.007862566038966179, -0.0037569389678537846, -0.015369607135653496, -0.01703783869743347, 0.022370709106326103, -0.0010118780191987753, 0.001960513647645712, 0.044167108833789825, -0.03886158764362335, -0.027074573561549187, -0.003009995212778449, -0.0010640103137120605, 0.0002895047946367413, -0.03590799495577812, 0.0015460196882486343, -0.027853993698954582, -0.012559594586491585, -0.02232968620955944, -0.01769419200718403, -0.019020572304725647, 0.008430037647485733, 0.01211518794298172, -0.0063652596436440945, -0.008662496693432331, -0.007377137895673513, 0.03929915651679039, -0.014289358630776405, -0.028551369905471802, -0.02937181107699871, 0.0580325722694397, 0.016025960445404053, 0.0035073880571871996, 0.003797961166128516, 0.0058866688050329685, -0.010474304668605328, -0.01219723280519247, 0.01262112706899643, 0.008320645429193974, 0.016477202996611595, -0.027539491653442383, -0.012867259792983532, -0.002146822400391102, 0.00014752993592992425, 0.0006221682415343821, 0.00345269194804132, -0.03333728015422821, 0.00692931329831481, 0.0011426359415054321, -0.00022177562641445547, -0.004334666766226292, 0.02165965922176838, 0.007876239717006683, -0.017051512375473976, -0.015711458399891853, -0.029043633490800858, 0.0008204416371881962, -0.012545919977128506, -0.021044328808784485, 0.01087768841534853, -0.022780928760766983, 0.02994612045586109, 0.00300144893117249, -0.0027057481929659843, 0.014945711940526962, -0.009715396910905838, -0.02388852648437023, -0.0016947247786447406, -0.005158526822924614, -0.012347646988928318, 0.014576513320207596, 0.005794369149953127, 0.004495336674153805, 0.034841421991586685, -0.026336176320910454, -0.0008751377463340759, -0.014535491354763508, -0.011185354553163052, -0.05223478376865387, -0.022274991497397423, 0.011739152483642101, 0.0010776843409985304, -0.00868984404951334, 0.016600269824266434, -0.0405845120549202, 0.024093637242913246, -0.0320245735347271, -0.0010238428367301822, -0.029043633490800858, -0.022493775933980942, 0.0202922560274601, 0.010870851576328278, -0.008894954808056355, 0.018911179155111313, -0.03150495886802673, -0.17262092232704163, 0.04113147407770157, 0.016805380582809448, -0.02122209034860134, 0.013250133022665977, 0.043374013155698776, 0.029508551582694054, 0.0164908766746521, 0.0215639416128397, -0.0015725131379440427, 0.0022169016301631927, -0.016422506421804428, -0.010303379967808723, -0.00637551536783576, -0.0013186889700591564, 0.034868769347667694, 0.024408139288425446, 0.0011870765592902899, 0.03019225224852562, 0.011322095058858395, 0.030903302133083344, -0.03784970939159393, 0.03276297077536583, 0.008156557567417622, -0.013756071217358112, -0.022370709106326103, -0.0033347534481436014, 0.030082860961556435, 0.004061186220496893, -0.027949711307883263, -0.0007195957005023956, 0.007828380912542343, -0.0027946294285357, 0.0006482343887910247, 0.015164496377110481, 0.008628311567008495, 0.023587698116898537, -0.022288665175437927, -0.009776929393410683, -0.0035278990399092436, 0.02711559645831585, -0.004167160019278526, -0.0060234093107283115, -0.024982448667287827, -0.010132454335689545, 0.0037227540742605925, 0.023163802921772003, -0.002996321301907301, 0.02778562344610691, 0.00600289786234498, 0.028934242203831673, -0.03708396106958389, 0.008792399428784847, 0.0010383714688941836, 0.008047165349125862, 0.010487979277968407, -0.01665496453642845, 0.018446262925863266, -0.0032698018476366997, -0.009626515209674835, -0.016285765916109085, -0.032243356108665466, -0.003661220893263817, 0.01646352931857109, -0.0065430221147835255, -0.033638108521699905, -0.029426507651805878, 0.009270990267395973, 0.0013366361381486058, 0.0019434212008491158, 0.008737703785300255, -0.027853993698954582, 0.0005768730188719928, -0.020702477544546127, 0.009045368991792202, -0.006019990425556898, -0.03795909881591797, -0.02163231186568737, -0.010925548151135445, 0.016668640077114105, -0.01843258924782276, 0.018788114190101624, 0.01065890397876501, 0.0001713526580715552, -0.018473610281944275, 0.002083580009639263, 0.003579176729544997, 0.021304134279489517, -0.009223131462931633, -0.02194681391119957, 0.023013388738036156, 0.004755143076181412, -0.020182864740490913, -0.019239356741309166, 0.01829584874212742, -0.00036043880390934646, 0.000244209571974352, -0.011431487277150154, -0.009715396910905838, -0.023450957611203194, 0.040420424193143845, 0.00690538389608264, -0.031040042638778687, 0.015383280813694, 0.034567940980196, 0.011793849058449268, -0.02216559834778309, 0.012368157505989075, 0.047968488186597824, 0.01716090366244316, -0.03000081703066826, 0.015396954491734505, 0.022028857842087746, 0.0024664527736604214, 0.010822992771863937, 0.03429445996880531, 0.004420129582285881, -0.03248948976397514, -0.00015297818754334003, 0.005975550040602684, 0.024968774989247322, -0.008382178843021393, -0.010638393461704254, 0.00888128113001585, -0.014644883573055267, -0.037412140518426895, -0.10217233002185822, -0.026281481608748436, 0.006713947281241417, 0.012552756816148758, -0.005182456225156784, -0.006669506896287203, -0.025474712252616882, 0.016545573249459267, 0.012039980851113796, 0.03713865950703621, 0.006724203005433083, -0.005257663317024708, 0.003681731875985861, 0.020346952602267265, 0.025707172229886055, 0.009359871968626976, -0.004816676024347544, -0.018323196098208427, -0.01167078223079443, 0.04958202317357063, -0.010378587059676647, -0.013660353608429432, -0.018883831799030304, -0.0018665047828108072, 0.007643781136721373, 0.013844952918589115, -0.04110412672162056, 0.006170405074954033, -0.0003950512036681175, 0.0185556560754776, -0.0020203376188874245, -0.0016425926005467772, 0.010180313140153885, -0.01420731469988823, 0.022534796968102455, 0.0010930675780400634, 0.0035278990399092436, -0.018993224948644638, 0.011506694369018078, -0.03839666768908501, 0.002835651393979788, 0.041186168789863586, 0.020182864740490913, -0.025775542482733727, 0.011773337610065937, 2.1525909687625244e-05, -0.011335768736898899, 0.03735744208097458, 0.014644883573055267, -0.010200824588537216, -0.01874709129333496, 0.00041876710020005703, -0.03054777719080448, 0.018596677109599113, 0.01420731469988823, 0.0022852718830108643, 0.010570023208856583, 0.010788807645440102, 0.019143639132380486, 0.005390985403209925, -0.037986449897289276, -0.015355932526290417, -0.012210906483232975, 0.03019225224852562, 0.014795297756791115, 0.033638108521699905, -0.02956324815750122, -0.006194334477186203, 0.030875954777002335, 0.017106208950281143, -0.009729070588946342, 0.00837534200400114, -0.02280827797949314, 0.026472916826605797, 0.005329451989382505, -0.00033244979567825794, -0.0185556560754776, 0.002331421710550785, 0.001037516864016652, -0.023423608392477036, -0.005914017092436552, -0.018405240029096603, -0.009660700336098671, 0.002989484230056405, 0.03990081325173378, 0.010570023208856583, -0.001583623350597918, -0.013981693424284458, 0.012600616551935673, -0.04271766170859337, 0.027730926871299744, 0.04394832253456116, 0.009995713829994202, -0.012894608080387115, 0.009667537175118923, 0.009018020704388618, 0.02350565232336521, 0.00919578317552805, -0.0018254827009513974, -0.02197416126728058, -0.04583533853292465, -0.023833829909563065, -0.06393975019454956, 0.03391158953309059, 0.0018562491750344634, -0.010788807645440102, -0.0008302698261104524, 0.0008405253756791353, -0.004799583461135626, -0.0055892588570714, 0.007575411349534988, 0.013448406010866165, -0.010788807645440102, -0.0012810854241251945, -0.02340993471443653, -0.0014152618823572993, -0.007876239717006683, -0.01624474488198757, -0.0028117219917476177, -0.004102208185940981, -0.0007695913664065301, -0.00954447127878666, -0.013325340114533901, -0.005910598207265139, 0.01725662313401699, 0.0022613422479480505, -0.007397648878395557, 0.03073921427130699, -0.015465324744582176, 0.0283872801810503, -0.01591656729578972, -0.015369607135653496, 0.038615453988313675, -0.010399097576737404, 0.007616433314979076, 0.023970570415258408, -0.014822646044194698, 0.009872647933661938, -0.009899996221065521, 0.011725478805601597, 0.0215639416128397, 0.008101860992610455, 0.009701722301542759, -0.0317237451672554, -0.004208181984722614, -0.019321400672197342, -0.002314329147338867, 0.011998958885669708, -0.01877444051206112, 0.00954447127878666, 0.01500040851533413, -0.005432007368654013, 0.04047512263059616, 0.0039757234044373035, -0.01658659614622593, -0.04925384745001793, -0.001497306046076119, -0.029317114502191544, -0.006149893626570702, -0.001469957991503179, 0.0035108064766973257, -0.020825544372200966, 0.06481488794088364, -0.009441915899515152, -0.0006623357185162604, -0.006751550827175379, 0.004857698455452919, -0.003399705048650503, -0.01874709129333496, 0.024148331955075264, -0.014453447423875332, -0.02252112329006195, -0.0264865905046463, -0.03634556382894516, -0.002986065810546279, 0.017707865685224533, 0.018090737983584404, 0.02701987884938717, -0.01306553278118372, 0.02280827797949314, 0.000530723191332072, 0.028469325974583626, 0.009188946336507797, -0.023163802921772003, 3.998050669906661e-05, 0.019417118281126022, 0.025925956666469574, -0.0006593444850295782, -0.009667537175118923, 0.010713600553572178, 0.001285358564928174, 0.01934874802827835, 0.006249030586332083, -0.0008533447980880737, 0.007602759171277285, -0.013667190447449684, 0.013339013792574406, -0.02127678692340851, -0.0056268624030053616, -0.002808303339406848, 0.016094330698251724, 0.02023756131529808, -0.015848197042942047, -0.00892913993448019, -0.01795399747788906, -0.019799992442131042, -0.02937181107699871, 0.0017451477469876409, -0.024462835863232613, -0.029727336019277573, 0.008874443359673023, 0.01982733979821205, -0.010850341059267521, 0.015725132077932358, 0.0063037266954779625, 0.029098330065608025, -0.033282581716775894, 0.03073921427130699, -0.011616086587309837, 0.0028612902387976646, -0.01135628018528223, 0.03793175145983696, 0.019239356741309166, 0.011616086587309837, 0.037248048931360245, -0.0017169450875371695, -0.006529347971081734, -0.010898199863731861, 0.01636781170964241, 0.0002360906364629045, 0.03339197486639023, 0.013865463435649872, -0.028688110411167145, 0.007691640406847, -0.010235009714961052, -0.014152618125081062, -0.013400547206401825, -0.019964080303907394, -0.010993918403983116, 0.014289358630776405, 0.04561655595898628, 0.0738944411277771, 0.020032450556755066, -0.02768990583717823, 0.02432609535753727, -0.01246387604624033, 0.029836727306246758, 0.03815053775906563, -0.018446262925863266, -0.014289358630776405, -0.01072043739259243, 0.04430384933948517, -0.0013178343651816249, -0.018227478489279747, -0.03259888291358948, -0.002762153511866927, 0.007937773130834103, -0.004560288041830063, 0.0063857706263661385, -0.003616780275478959, 0.012867259792983532, 0.02483203448355198, 0.004098789766430855, 0.03153230622410774, 0.013386872597038746, -0.03733009472489357, -0.011205865070223808, 0.03817788511514664, 0.0063242376782000065, -0.010344401933252811, -0.027963386848568916, 0.00932568684220314, -0.0020869984291493893, -0.033255234360694885, -0.031477611511945724, -0.0013306537875905633, 0.0013050149427726865, -0.007178864441812038, 0.01303818542510271, 0.0320245735347271, -0.0017964253202080727, -0.007445508148521185, -0.01078197080641985, -0.0035108064766973257, -0.023013388738036156, -0.009489774703979492, 0.0024117566645145416, -0.010405934415757656, -0.009004347026348114, -0.03251683712005615]}, "text_id_to_ref_doc_id": {"855a4bd9-ed6b-4306-a571-752f33700cfc": "6759f6b2-5179-4b00-b040-7c0209841548"}, "metadata_dict": {"855a4bd9-ed6b-4306-a571-752f33700cfc": {"file_path": "data\\sample.md", "file_name": "sample.md", "file_type": null, "file_size": 3554, "creation_date": "2024-02-13", "last_modified_date": "2024-02-16", "last_accessed_date": "2024-02-16", "_node_type": "TextNode", "document_id": "6759f6b2-5179-4b00-b040-7c0209841548", "doc_id": "6759f6b2-5179-4b00-b040-7c0209841548", "ref_doc_id": "6759f6b2-5179-4b00-b040-7c0209841548"}}} -------------------------------------------------------------------------------- /ch5/index_cache/docstore.json: -------------------------------------------------------------------------------- 1 | {"docstore/metadata": {"6759f6b2-5179-4b00-b040-7c0209841548": {"doc_hash": "f8d8537fdbb8cd34eefa11586ef934eccb5e108d3ce69e94830cf116d469547f"}, "855a4bd9-ed6b-4306-a571-752f33700cfc": {"doc_hash": "f8d8537fdbb8cd34eefa11586ef934eccb5e108d3ce69e94830cf116d469547f", "ref_doc_id": "6759f6b2-5179-4b00-b040-7c0209841548"}}, "docstore/data": {"855a4bd9-ed6b-4306-a571-752f33700cfc": {"__data__": {"id_": "855a4bd9-ed6b-4306-a571-752f33700cfc", "embedding": null, "metadata": {"file_path": "data\\sample.md", "file_name": "sample.md", "file_type": null, "file_size": 3554, "creation_date": "2024-02-13", "last_modified_date": "2024-02-16", "last_accessed_date": "2024-02-16"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "6759f6b2-5179-4b00-b040-7c0209841548", "node_type": "4", "metadata": {"file_path": "data\\sample.md", "file_name": "sample.md", "file_type": null, "file_size": 3554, "creation_date": "2024-02-13", "last_modified_date": "2024-02-16", "last_accessed_date": "2024-02-16"}, "hash": "f8d8537fdbb8cd34eefa11586ef934eccb5e108d3ce69e94830cf116d469547f", "class_name": "RelatedNodeInfo"}}, "text": "An h1 header\n============\n\nParagraphs are separated by a blank line.\n\n2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists\nlook like:\n\n * this one\n * that one\n * the other one\n\nNote that --- not considering the asterisk --- the actual text\ncontent starts at 4-columns in.\n\n> Block quotes are\n> written like so.\n>\n> They can span multiple paragraphs,\n> if you like.\n\nUse 3 dashes for an em-dash. Use 2 dashes for ranges (ex., \"it's all\nin chapters 12--14\"). Three dots ... will be converted to an ellipsis.\nUnicode is supported. \u263a\n\n\n\nAn h2 header\n------------\n\nHere's a numbered list:\n\n 1. first item\n 2. second item\n 3. third item\n\nNote again how the actual text starts at 4 columns in (4 characters\nfrom the left side). Here's a code sample:\n\n # Let me re-iterate ...\n for i in 1 .. 10 { do-something(i) }\n\nAs you probably guessed, indented 4 spaces. By the way, instead of\nindenting the block, you can use delimited blocks, if you like:\n\n~~~\ndefine foobar() {\n print \"Welcome to flavor country!\";\n}\n~~~\n\n(which makes copying & pasting easier). You can optionally mark the\ndelimited block for Pandoc to syntax highlight it:\n\n~~~python\nimport time\n# Quick, count to ten!\nfor i in range(10):\n # (but not *too* quick)\n time.sleep(0.5)\n print i\n~~~\n\n\n\n### An h3 header ###\n\nNow a nested list:\n\n 1. First, get these ingredients:\n\n * carrots\n * celery\n * lentils\n\n 2. Boil some water.\n\n 3. Dump everything in the pot and follow\n this algorithm:\n\n find wooden spoon\n uncover pot\n stir\n cover pot\n balance wooden spoon precariously on pot handle\n wait 10 minutes\n goto first step (or shut off burner when done)\n\n Do not bump wooden spoon or it will fall.\n\nNotice again how text always lines up on 4-space indents (including\nthat last line which continues item 3 above).\n\nHere's a link to [a website](http://foo.bar), to a [local\ndoc](local-doc.html), and to a [section heading in the current\ndoc](#an-h2-header). Here's a footnote [^1].\n\n[^1]: Footnote text goes here.\n\nTables can look like this:\n\nsize material color\n---- ------------ ------------\n9 leather brown\n10 hemp canvas natural\n11 glass transparent\n\nTable: Shoes, their sizes, and what they're made of\n\n(The above is the caption for the table.) Pandoc also supports\nmulti-line tables:\n\n-------- -----------------------\nkeyword text\n-------- -----------------------\nred Sunsets, apples, and\n other red or reddish\n things.\n\ngreen Leaves, grass, frogs\n and other things it's\n not easy being.\n-------- -----------------------\n\nA horizontal rule follows.\n\n***\n\nHere's a definition list:\n\napples\n : Good for making applesauce.\noranges\n : Citrus!\ntomatoes\n : There's no \"e\" in tomatoe.\n\nAgain, text is indented 4 spaces. (Put a blank line between each\nterm/definition pair to spread things out more.)\n\nHere's a \"line block\":\n\n| Line one\n| Line too\n| Line tree\n\nand images can be specified like so:\n\n![example image](example-image.jpg \"An exemplary image\")\n\nInline math equations go in like so: $\\omega = d\\phi / dt$. Display\nmath should get its own line and be put in in double-dollarsigns:\n\n$$I = \\int \\rho R^{2} dV$$\n\nAnd note that you can backslash-escape any punctuation characters\nwhich you wish to be displayed literally, ex.: \\`foo\\`, \\*bar\\*, etc.", "start_char_idx": 0, "end_char_idx": 3396, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n", "class_name": "TextNode"}, "__type__": "1"}}, "docstore/ref_doc_info": {"6759f6b2-5179-4b00-b040-7c0209841548": {"node_ids": ["855a4bd9-ed6b-4306-a571-752f33700cfc"], "metadata": {"file_path": "data\\sample.md", "file_name": "sample.md", "file_type": null, "file_size": 3554, "creation_date": "2024-02-13", "last_modified_date": "2024-02-16", "last_accessed_date": "2024-02-16"}}}} -------------------------------------------------------------------------------- /ch5/index_cache/graph_store.json: -------------------------------------------------------------------------------- 1 | {"graph_dict": {}} -------------------------------------------------------------------------------- /ch5/index_cache/image__vector_store.json: -------------------------------------------------------------------------------- 1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}} -------------------------------------------------------------------------------- /ch5/index_cache/index_store.json: -------------------------------------------------------------------------------- 1 | {"index_store/data": {"2070ba32-91af-4bb9-9765-ed6da7ffa1e8": {"__type__": "vector_store", "__data__": "{\"index_id\": \"2070ba32-91af-4bb9-9765-ed6da7ffa1e8\", \"summary\": null, \"nodes_dict\": {\"855a4bd9-ed6b-4306-a571-752f33700cfc\": \"855a4bd9-ed6b-4306-a571-752f33700cfc\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}} -------------------------------------------------------------------------------- /ch5/sample_ChromaDB.py: -------------------------------------------------------------------------------- 1 | import chromadb 2 | from llama_index.vector_stores.chroma import ChromaVectorStore 3 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext 4 | 5 | db = chromadb.PersistentClient(path="chroma_database") 6 | chroma_collection = db.get_or_create_collection( 7 | "my_chroma_store" 8 | ) 9 | 10 | vector_store = ChromaVectorStore( 11 | chroma_collection=chroma_collection 12 | ) 13 | storage_context = StorageContext.from_defaults( 14 | vector_store=vector_store 15 | ) 16 | 17 | documents = SimpleDirectoryReader("files").load_data() 18 | index = VectorStoreIndex.from_documents( 19 | documents=documents, 20 | storage_context=storage_context 21 | ) 22 | 23 | #the following part displays the entire contents of the ChromaDB collection 24 | results = chroma_collection.get() 25 | print(results) 26 | 27 | ''' We can use the next part to rebuild the Index from the ChromaDB in future sessions 28 | index = VectorStoreIndex.from_vector_store( 29 | vector_store=vector_store, 30 | storage_context=storage_context 31 | ) 32 | ''' 33 | 34 | -------------------------------------------------------------------------------- /ch5/sample_ComposableGraph.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import ComposableGraph, SimpleDirectoryReader, TreeIndex, SummaryIndex 2 | 3 | documents = SimpleDirectoryReader("files").load_data() 4 | index1 = TreeIndex.from_documents([documents[0]]) 5 | index2 = TreeIndex.from_documents([documents[1]]) 6 | summary1 = "A short introduction to ancient Rome" 7 | summary2 = "Some facts about dogs" 8 | 9 | graph = ComposableGraph.from_indices( 10 | SummaryIndex, [index1, index2], 11 | index_summaries=[summary1, summary2] 12 | ) 13 | query_engine = graph.as_query_engine() 14 | 15 | response = query_engine.query("What can you tell me?") 16 | print(response) 17 | -------------------------------------------------------------------------------- /ch5/sample_DocumentSummaryIndex.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import DocumentSummaryIndex, SimpleDirectoryReader 2 | 3 | documents = SimpleDirectoryReader("files").load_data() 4 | index = DocumentSummaryIndex.from_documents( 5 | documents, 6 | show_progress=True 7 | ) 8 | 9 | summary1 = index.get_document_summary(documents[0].doc_id) 10 | summary2 = index.get_document_summary(documents[1].doc_id) 11 | print("\nSummary of the first document:" + summary1) 12 | print("\nSummary of the second document:" + summary2) -------------------------------------------------------------------------------- /ch5/sample_KeywordTableIndex.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import KeywordTableIndex, SimpleDirectoryReader 2 | documents = SimpleDirectoryReader("files").load_data() 3 | index = KeywordTableIndex.from_documents(documents) 4 | query_engine = index.as_query_engine() 5 | response = query_engine.query("What famous buildings were in ancient Rome?") 6 | print(response) 7 | -------------------------------------------------------------------------------- /ch5/sample_KnowledgeGraphIndex.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import KnowledgeGraphIndex, SimpleDirectoryReader 2 | documents = SimpleDirectoryReader("files").load_data() 3 | index = KnowledgeGraphIndex.from_documents(documents, max_triplets_per_chunk=2, use_async=True) 4 | query_engine = index.as_query_engine() 5 | response = query_engine.query("Tell me about dogs.") 6 | print(response) 7 | -------------------------------------------------------------------------------- /ch5/sample_SummaryIndex.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 2 | documents = SimpleDirectoryReader("files").load_data() 3 | index = SummaryIndex.from_documents(documents) 4 | query_engine = index.as_query_engine() 5 | response = query_engine.query("How many documents have you loaded?") 6 | print(response) 7 | -------------------------------------------------------------------------------- /ch5/sample_TreeIndex.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import TreeIndex, SimpleDirectoryReader 2 | documents = SimpleDirectoryReader("files").load_data() 3 | index = TreeIndex.from_documents(documents) 4 | query_engine = index.as_query_engine() 5 | response = query_engine.query("Tell me about dogs") 6 | print(response) 7 | -------------------------------------------------------------------------------- /ch5/sample_VectorStoreIndex.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 2 | documents = SimpleDirectoryReader("files").load_data() 3 | index = VectorStoreIndex.from_documents(documents) 4 | print("Index created successfully!") 5 | -------------------------------------------------------------------------------- /ch5/sample_cost_estimation1.py: -------------------------------------------------------------------------------- 1 | import tiktoken 2 | from llama_index.core import TreeIndex, SimpleDirectoryReader, Settings 3 | from llama_index.core.llms.mock import MockLLM 4 | from llama_index.core.callbacks import CallbackManager, TokenCountingHandler 5 | 6 | llm = MockLLM(max_tokens=256) 7 | token_counter = TokenCountingHandler( 8 | tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode 9 | ) 10 | callback_manager = CallbackManager([token_counter]) 11 | 12 | Settings.callback_manager=callback_manager 13 | Settings.llm=llm 14 | 15 | documents = SimpleDirectoryReader("cost_prediction_samples").load_data() 16 | 17 | index = TreeIndex.from_documents( 18 | documents=documents, 19 | num_children=2, 20 | show_progress=True) 21 | 22 | print("Total LLM Token Count:", token_counter.total_llm_token_count) 23 | 24 | 25 | -------------------------------------------------------------------------------- /ch5/sample_cost_estimation2.py: -------------------------------------------------------------------------------- 1 | import tiktoken 2 | from llama_index.core import MockEmbedding, VectorStoreIndex, SimpleDirectoryReader, Settings 3 | from llama_index.core.callbacks import CallbackManager, TokenCountingHandler 4 | from llama_index.core.llms.mock import MockLLM 5 | 6 | embed_model = MockEmbedding(embed_dim=1536) 7 | llm = MockLLM(max_tokens=256) 8 | token_counter = TokenCountingHandler( 9 | tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode 10 | ) 11 | callback_manager = CallbackManager([token_counter]) 12 | 13 | Settings.embed_model=embed_model 14 | Settings.llm=llm 15 | Settings.callback_manager=callback_manager 16 | 17 | documents = SimpleDirectoryReader("cost_prediction_samples").load_data() 18 | index = VectorStoreIndex.from_documents( 19 | documents=documents, 20 | show_progress=True) 21 | print("Embedding Token Count:", token_counter.total_embedding_token_count) 22 | 23 | query_engine = index.as_query_engine() 24 | response = query_engine.query("What's the cat's name?") 25 | print("Query LLM Token Count:", token_counter.total_llm_token_count) 26 | print("Query Embedding Token Count:",token_counter.total_embedding_token_count) 27 | 28 | -------------------------------------------------------------------------------- /ch5/sample_local_embedding.py: -------------------------------------------------------------------------------- 1 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding 2 | embedding_model = HuggingFaceEmbedding( 3 | model_name="WhereIsAI/UAE-Large-V1" 4 | ) 5 | embeddings = embedding_model.get_text_embedding( 6 | "The quick brown fox jumps over the lazy cat!" 7 | ) 8 | print(embeddings[:15]) 9 | 10 | -------------------------------------------------------------------------------- /ch5/sample_persist.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 2 | documents = SimpleDirectoryReader("data").load_data() 3 | index = VectorStoreIndex.from_documents(documents) 4 | 5 | index.storage_context.persist(persist_dir="index_cache") 6 | print("Index persisted to disk.") -------------------------------------------------------------------------------- /ch5/sample_persist_reload.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import StorageContext, load_index_from_storage 2 | storage_context = StorageContext.from_defaults(persist_dir="index_cache") 3 | index = load_index_from_storage(storage_context) 4 | 5 | print("Index loaded successfully!") 6 | -------------------------------------------------------------------------------- /ch6/files/others/sample.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Example of a simple HTML page 9 | 10 | 16 | 36 | 42 | 43 | 44 | 45 | 46 | 75 | 76 | 82 |

Example of a simple HTML page

83 | 84 |

Hypertext Markup Language (HTML) is the most common language used to 85 | create documents on the World Wide Web. HTML uses hundreds of different 86 | tags to define a layout for web pages. Most tags require an opening <tag> 87 | and a closing </tag>.

88 | 89 |

Example:  <b>On 90 | a webpage, this sentence would be in bold print.</b>

91 | 92 |

Below is an example of a very simple page:

93 | 94 |

95 | 96 |

 This 97 | is the code used to make the page:

98 | 99 |

<HTML>

100 | 101 |

<HEAD>

102 | 103 |

<TITLE>Your Title Here</TITLE> 104 |

105 | 106 |

</HEAD>

107 | 108 |

<BODY BGCOLOR="FFFFFF"> 109 |

110 | 111 |

<CENTER><IMG SRC="clouds.jpg" 112 | ALIGN="BOTTOM"> </CENTER>

113 | 114 |

<HR>

115 | 116 |

<a href="http://somegreatsite.com">Link 117 | Name</a>

118 | 119 |

is a link to another nifty site

120 | 121 |

<H1>This is a Header</H1>

122 | 123 |

<H2>This is a Medium Header</H2> 124 |

125 | 126 |

Send me mail at <a href="mailto:support@yourcompany.com">

127 | 128 |

support@yourcompany.com</a>.

129 | 130 |

<P> This is a new paragraph!

131 | 132 |

<P> <B>This is a new paragraph!</B> 133 |

134 | 135 |

<BR> <B><I>This is a new 136 | sentence without a paragraph break, in bold italics.</I></B> 137 |

138 | 139 |

<HR>

140 | 141 |

</BODY>

142 | 143 |

</HTML>

144 | 145 |

 

146 | 147 |

 

148 | 149 |

 

150 | 151 | 157 | 158 | 159 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /ch6/files/others/sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "quiz": { 3 | "sport": { 4 | "q1": { 5 | "question": "Which one is correct team name in NBA?", 6 | "options": [ 7 | "New York Bulls", 8 | "Los Angeles Kings", 9 | "Golden State Warriros", 10 | "Huston Rocket" 11 | ], 12 | "answer": "Huston Rocket" 13 | } 14 | }, 15 | "maths": { 16 | "q1": { 17 | "question": "5 + 7 = ?", 18 | "options": [ 19 | "10", 20 | "11", 21 | "12", 22 | "13" 23 | ], 24 | "answer": "12" 25 | }, 26 | "q2": { 27 | "question": "12 - 8 = ?", 28 | "options": [ 29 | "1", 30 | "2", 31 | "3", 32 | "4" 33 | ], 34 | "answer": "4" 35 | } 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /ch6/files/others/sample.md: -------------------------------------------------------------------------------- 1 | An h1 header 2 | ============ 3 | 4 | Paragraphs are separated by a blank line. 5 | 6 | 2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists 7 | look like: 8 | 9 | * this one 10 | * that one 11 | * the other one 12 | 13 | Note that --- not considering the asterisk --- the actual text 14 | content starts at 4-columns in. 15 | 16 | > Block quotes are 17 | > written like so. 18 | > 19 | > They can span multiple paragraphs, 20 | > if you like. 21 | 22 | Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all 23 | in chapters 12--14"). Three dots ... will be converted to an ellipsis. 24 | Unicode is supported. ☺ 25 | 26 | 27 | 28 | An h2 header 29 | ------------ 30 | 31 | Here's a numbered list: 32 | 33 | 1. first item 34 | 2. second item 35 | 3. third item 36 | 37 | Note again how the actual text starts at 4 columns in (4 characters 38 | from the left side). Here's a code sample: 39 | 40 | # Let me re-iterate ... 41 | for i in 1 .. 10 { do-something(i) } 42 | 43 | As you probably guessed, indented 4 spaces. By the way, instead of 44 | indenting the block, you can use delimited blocks, if you like: 45 | 46 | ~~~ 47 | define foobar() { 48 | print "Welcome to flavor country!"; 49 | } 50 | ~~~ 51 | 52 | (which makes copying & pasting easier). You can optionally mark the 53 | delimited block for Pandoc to syntax highlight it: 54 | 55 | ~~~python 56 | import time 57 | # Quick, count to ten! 58 | for i in range(10): 59 | # (but not *too* quick) 60 | time.sleep(0.5) 61 | print i 62 | ~~~ 63 | 64 | 65 | 66 | ### An h3 header ### 67 | 68 | Now a nested list: 69 | 70 | 1. First, get these ingredients: 71 | 72 | * carrots 73 | * celery 74 | * lentils 75 | 76 | 2. Boil some water. 77 | 78 | 3. Dump everything in the pot and follow 79 | this algorithm: 80 | 81 | find wooden spoon 82 | uncover pot 83 | stir 84 | cover pot 85 | balance wooden spoon precariously on pot handle 86 | wait 10 minutes 87 | goto first step (or shut off burner when done) 88 | 89 | Do not bump wooden spoon or it will fall. 90 | 91 | Notice again how text always lines up on 4-space indents (including 92 | that last line which continues item 3 above). 93 | 94 | Here's a link to [a website](http://foo.bar), to a [local 95 | doc](local-doc.html), and to a [section heading in the current 96 | doc](#an-h2-header). Here's a footnote [^1]. 97 | 98 | [^1]: Footnote text goes here. 99 | 100 | Tables can look like this: 101 | 102 | size material color 103 | ---- ------------ ------------ 104 | 9 leather brown 105 | 10 hemp canvas natural 106 | 11 glass transparent 107 | 108 | Table: Shoes, their sizes, and what they're made of 109 | 110 | (The above is the caption for the table.) Pandoc also supports 111 | multi-line tables: 112 | 113 | -------- ----------------------- 114 | keyword text 115 | -------- ----------------------- 116 | red Sunsets, apples, and 117 | other red or reddish 118 | things. 119 | 120 | green Leaves, grass, frogs 121 | and other things it's 122 | not easy being. 123 | -------- ----------------------- 124 | 125 | A horizontal rule follows. 126 | 127 | *** 128 | 129 | Here's a definition list: 130 | 131 | apples 132 | : Good for making applesauce. 133 | oranges 134 | : Citrus! 135 | tomatoes 136 | : There's no "e" in tomatoe. 137 | 138 | Again, text is indented 4 spaces. (Put a blank line between each 139 | term/definition pair to spread things out more.) 140 | 141 | Here's a "line block": 142 | 143 | | Line one 144 | | Line too 145 | | Line tree 146 | 147 | and images can be specified like so: 148 | 149 | ![example image](example-image.jpg "An exemplary image") 150 | 151 | Inline math equations go in like so: $\omega = d\phi / dt$. Display 152 | math should get its own line and be put in in double-dollarsigns: 153 | 154 | $$I = \int \rho R^{2} dV$$ 155 | 156 | And note that you can backslash-escape any punctuation characters 157 | which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc. -------------------------------------------------------------------------------- /ch6/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch6/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch6/sample_bm25_retriever.py: -------------------------------------------------------------------------------- 1 | from llama_index.retrievers.bm25 import BM25Retriever 2 | from llama_index.core.node_parser import SentenceSplitter 3 | from llama_index.core import SimpleDirectoryReader 4 | reader = SimpleDirectoryReader('files') 5 | documents = reader.load_data() 6 | splitter = SentenceSplitter.from_defaults( 7 | chunk_size=60, 8 | chunk_overlap=0, 9 | include_metadata=False 10 | ) 11 | nodes = splitter.get_nodes_from_documents( 12 | documents 13 | ) 14 | 15 | retriever = BM25Retriever.from_defaults( 16 | nodes=nodes, 17 | similarity_top_k=2 18 | ) 19 | response = retriever.retrieve("Who built the Colosseum?") 20 | for node_with_score in response: 21 | print('Text:'+node_with_score.node.text) 22 | print('Score: '+str(node_with_score.score)) 23 | 24 | -------------------------------------------------------------------------------- /ch6/sample_decompose_query_transform.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.indices.query.query_transform.base import DecomposeQueryTransform 2 | decompose = DecomposeQueryTransform() 3 | query_bundle = decompose.run( 4 | "Tell me about buildings in ancient Rome" 5 | ) 6 | print(query_bundle.query_str) 7 | -------------------------------------------------------------------------------- /ch6/sample_metadata_filters.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.vector_stores import MetadataFilter, MetadataFilters 2 | from llama_index.core import VectorStoreIndex 3 | from llama_index.core.schema import TextNode 4 | user_departments = {"Alice": "Security", "Bob": "IT"} 5 | 6 | nodes = [ 7 | TextNode( 8 | text=( 9 | "An incident is an accidental or malicious event that has the potential to cause unwanted effects on the security of our IT assets."), 10 | metadata={"department": "Security"}, 11 | ), 12 | TextNode( 13 | text=("An incident is an unexpected interruption or degradation of an IT service."), 14 | metadata={"department": "IT"}, 15 | ) 16 | ] 17 | 18 | def show_report(index, user, query): 19 | user_department = user_departments[user] 20 | filters = MetadataFilters( 21 | filters=[ 22 | MetadataFilter(key="department", value=user_department) 23 | ] 24 | ) 25 | retriever = index.as_retriever(filters=filters) 26 | response = retriever.retrieve(query) 27 | print(f"Response for {user}: {response[0].node.text}") 28 | 29 | 30 | index = VectorStoreIndex(nodes) 31 | query = "What is an incident?" 32 | show_report(index, "Alice", query) 33 | show_report(index, "Bob", query) 34 | -------------------------------------------------------------------------------- /ch6/sample_openai_question_generator.py: -------------------------------------------------------------------------------- 1 | from llama_index.question_gen.openai import OpenAIQuestionGenerator 2 | from llama_index.core.tools import RetrieverTool, ToolMetadata 3 | from llama_index.core import VectorStoreIndex, SummaryIndex, SimpleDirectoryReader, QueryBundle 4 | 5 | documents = SimpleDirectoryReader("files").load_data() 6 | vector_index = VectorStoreIndex.from_documents([documents[0]]) 7 | summary_index = SummaryIndex.from_documents([documents[1]]) 8 | 9 | vector_tool_metadata = ToolMetadata( 10 | name="Vector Tool", 11 | description="Use this for answering questions about Ancient Rome" 12 | ) 13 | summary_tool_metadata = ToolMetadata( 14 | name="Summary Tool", 15 | description="Use this for answering questions about dogs" 16 | ) 17 | 18 | vector_tool = RetrieverTool( 19 | retriever=vector_index.as_retriever(), 20 | metadata=vector_tool_metadata 21 | ) 22 | summary_tool = RetrieverTool( 23 | retriever=summary_index.as_retriever(), 24 | metadata=summary_tool_metadata 25 | ) 26 | 27 | question_generator = OpenAIQuestionGenerator.from_defaults() 28 | query_bundle = QueryBundle(query_str="Tell me about dogs and Ancient Rome") 29 | sub_questions = question_generator.generate( 30 | tools=[vector_tool.metadata, summary_tool.metadata], 31 | query=query_bundle 32 | ) 33 | 34 | for sub_question in sub_questions: 35 | print(f"{sub_question.tool_name}: {sub_question.sub_question}") 36 | -------------------------------------------------------------------------------- /ch6/sample_retriever_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from llama_index.core import KeywordTableIndex 3 | from llama_index.core import SimpleDirectoryReader 4 | 5 | async def retrieve(retriever, query, label): 6 | response = await retriever.aretrieve(query) 7 | print(f"{label} retrieved {str(len(response))} nodes") 8 | 9 | async def main(): 10 | reader = SimpleDirectoryReader('files') 11 | documents = reader.load_data() 12 | index = KeywordTableIndex.from_documents(documents) 13 | retriever1 = index.as_retriever( 14 | retriever_mode='default' 15 | ) 16 | retriever2 = index.as_retriever( 17 | retriever_mode='simple' 18 | ) 19 | query = "Where is the Colosseum?" 20 | 21 | await asyncio.gather( 22 | retrieve(retriever1, query, ''), 23 | retrieve(retriever2, query, '') 24 | ) 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /ch6/sample_retriever_direct.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 2 | from llama_index.core.retrievers import SummaryIndexEmbeddingRetriever 3 | 4 | documents = SimpleDirectoryReader("files").load_data() 5 | summary_index = SummaryIndex.from_documents(documents) 6 | retriever = SummaryIndexEmbeddingRetriever( 7 | index=summary_index 8 | ) 9 | result = retriever.retrieve("Tell me about ancient Rome") 10 | print(result[0].text) 11 | -------------------------------------------------------------------------------- /ch6/sample_retriever_from_index1.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 2 | 3 | documents = SimpleDirectoryReader("files").load_data() 4 | summary_index = SummaryIndex.from_documents(documents) 5 | retriever = summary_index.as_retriever( 6 | retriever_mode='embedding' 7 | ) 8 | result = retriever.retrieve("Tell me about ancient Rome") 9 | print(result[0].text) 10 | -------------------------------------------------------------------------------- /ch6/sample_retriever_tools.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.selectors import PydanticMultiSelector 2 | from llama_index.core.retrievers import RouterRetriever 3 | from llama_index.core.tools import RetrieverTool 4 | from llama_index.core import VectorStoreIndex, SummaryIndex, SimpleDirectoryReader 5 | documents = SimpleDirectoryReader("files").load_data() 6 | 7 | vector_index = VectorStoreIndex.from_documents([documents[0]]) 8 | summary_index = SummaryIndex.from_documents([documents[1]]) 9 | 10 | vector_retriever = vector_index.as_retriever() 11 | summary_retriever = summary_index.as_retriever() 12 | 13 | vector_tool = RetrieverTool.from_defaults( 14 | retriever=vector_retriever, 15 | description="Use this for answering questions about Ancient Rome" 16 | ) 17 | summary_tool = RetrieverTool.from_defaults( 18 | retriever=summary_retriever, 19 | description="Use this for answering questions about dogs" 20 | ) 21 | 22 | retriever = RouterRetriever( 23 | selector=PydanticMultiSelector.from_defaults(), 24 | retriever_tools=[ 25 | vector_tool, 26 | summary_tool 27 | ] 28 | ) 29 | response = retriever.retrieve( 30 | "What can you tell me about the Ancient Rome?" 31 | ) 32 | for r in response: 33 | print(r.text) 34 | 35 | ''' 36 | #other possible queries 37 | retriever.retrieve("What can you tell me about the Ancient Rome?") 38 | retriever.retrieve("Tell me all you know about dogs") 39 | retriever.retrieve("Tell me abous dogs in Ancient Rome") 40 | ''' -------------------------------------------------------------------------------- /ch6/sample_selectors.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.selectors import LLMSingleSelector 2 | options = [ 3 | "option 1 this is good for summarization questions", 4 | "option 2: this is useful for precise definitions", 5 | "option 3: this is useful for comparing concepts", 6 | ] 7 | selector = LLMSingleSelector.from_defaults() 8 | 9 | decision = selector.select( 10 | options, 11 | query="What's the definition of space?" 12 | ).selections[0] 13 | print(decision.index+1) 14 | print(decision.reason) -------------------------------------------------------------------------------- /ch7/files/other/sample_fluffy_the_cat.txt: -------------------------------------------------------------------------------- 1 | In the quaint village of Lavender Hollow, nestled snugly within the embrace of towering trees and rolling hills, a ginger tabby cat named Fluffy resided in a cozy cottage that exuded warmth and tranquility. Fluffy, with her enchanting amber eyes and majestic ginger fur, had become an integral part of the tapestry of life that unfolded within the village. 2 | 3 | It was a beautiful morning, with ethereal rays of sunlight streaming through the curtains, casting a soft golden glow upon Fluffy's slumbering form. As her eyes fluttered open, Fluffy could sense the promise of excitement and adventure that awaited her just outside the window. It was a tranquil Wednesday morning, February 7th, 2024, and the world outside seemed to beckon her with promises of new discoveries and delightful escapades. 4 | 5 | With a graceful stretch, Fluffy emerged from her cozy bed and padded over to the kitchen, where the Joneses, a family known for their unwavering love for animals, awaited her arrival. Mr. and Mrs. Jones, along with their daughter Lucy, had showered Fluffy with love and care ever since she had entered their lives as a tiny, fragile kitten. Lucy, a sprightly 7-year-old with an infectious giggle and a boundless spirit, was Fluffy's cherished companion in mischief and merriment. Together, they embarked on countless adventures, their spirits intertwining like threads in a beautiful tapestry. 6 | 7 | After indulging in a leisurely breakfast of tender morsels, handpicked by Mr. Jones from their vibrant vegetable patch, Fluffy sauntered gracefully through the cat door. The world outside awaited her arrival, a realm ripe for exploration and adventure. The backyard, an oasis of green nestled beside a meandering brook, lay before her like an undiscovered paradise. 8 | 9 | Passing by the vibrant flower beds, where bees hummed with joy amidst the intoxicating scent of lavender and roses, Fluffy made her way towards the beckoning melody of the babbling stream. Its crystalline waters flowed gently, winding through the picturesque landscape like a silver ribbon, whispering secrets of forgotten tales. Fluffy felt a familiar tug at her heart, a longing to immerse herself in the tranquil beauty that awaited her there. 10 | 11 | At the water's edge, Fluffy embarked on her daily ritual of discovery. With nimble paws, she playfully batted at darting fish, observed the graceful dance of dragonflies, and relished in the warmth of sun-kissed rocks. Every day brought new wonders and hidden surprises, as if the stream itself delighted in revealing its secrets to the curious feline explorer. 12 | 13 | As Fluffy ambled along the brook's gentle curve, a splash disrupted the tranquility, drawing her curious gaze downstream. There, amidst the shallows, Lucy Jones frolicked in the crystalline waters, her youthful exuberance evident as she eagerly overturned stones in search of aquatic critters. Lucy, with her vibrant personality and infectious laughter, was the perfect companion for Fluffy's more daring escapades. 14 | 15 | A glimmer of silver caught Fluffy's eye, its brilliance beckoning her closer. With cautious steps, she approached the water's edge, her senses attuned to the mysterious allure. Nestled between two mossy stones lay a treasure—a delicate silver pendant in the shape of a crescent moon, suspended from a gossamer chain. Its radiance remained undiminished despite its watery resting place. Fluffy's keen eyes widened in awe as she recognized this delicate piece of jewelry. 16 | 17 | With a deft motion, Fluffy extended her paw, retrieving the pendant from its aqueous sanctuary. Admiring the intricate craftsmanship and the iridescence of tiny diamonds that adorned its surface, Fluffy marveled at the beauty of this lost trinket. Yet, her joy was tempered by a realization—a notion that this exquisite jewel held significance beyond its aesthetic allure. 18 | 19 | A cry of distress echoed down the stream, drawing Fluffy's attention back to Lucy, whose frantic gestures betrayed her anguish. Rocks were upturned, and sandy depths were scrutinized as Lucy desperately searched for something precious—something lost amidst the playful ripples of the brook. 20 | 21 | "It must be hers," Fluffy mused, her feline intuition guiding her actions. With purposeful strides, she approached Lucy, the pendant cradled gently in her jaws. A soft meow escaped her lips as she deposited the treasure at Lucy's feet, a silent offering of solace and reassurance. 22 | 23 | Lucy's eyes widened in disbelief as she beheld the pendant, her expression a mixture of astonishment and gratitude. "My necklace! Fluffy, you found it!" she exclaimed, her voice tinged with relief and elation. With trembling fingers, she retrieved the pendant, its familiar weight a comforting reminder of her mother's love and affection. 24 | 25 | In that moment, a bond of camaraderie and trust was forged between cat and child—a testament to the enduring power of friendship and empathy. As Lucy embraced her feline companion with a fierce hug, tears of gratitude glistening in her eyes, Fluffy purred contentedly, her heart aglow with a sense of fulfillment and belonging. 26 | 27 | As the years passed, Fluffy and Lucy would continue their escapades along the idyllic waters of their beloved village stream. The pendant, now lovingly displayed around Lucy's neck, became a symbol of their unbreakable bond and a reminder of the fateful day Fluffy had crossed paths with destiny itself. 28 | 29 | Fluffy, with her adventurous spirit and deep well of wisdom, became a legend in Lavender Hollow. Whenever village children gathered along the stream's edge, they would regale one another with tales of Fluffy's courage and her unwavering loyalty to Lucy. And though Fluffy's physical form eventually returned to the embrace of nature, her spirit, forever intertwined with the gentle stream, continued to inspire every living creature that graced Lavender Hollow. 30 | 31 | And so, the story of Fluffy, the ginger tabby cat who awakened to the call of adventure and wandered alongside the murmuring stream, became etched into the tapestry of the village's history. Her story served as a reminder to all who heard it, that love transcends species, and true friendship can be found in the most unexpected of souls. -------------------------------------------------------------------------------- /ch7/files/sample/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch7/files/sample/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch7/files/sample/sample_document3.txt: -------------------------------------------------------------------------------- 1 | In ancient Athens, the city of Athens itself was the epicenter of the flourishing Athenian civilization. It was renowned for its remarkable achievements in philosophy, politics, and culture. The Athenians were known for their intellectual prowess and creativity, contributing significantly to the fields of philosophy, mathematics, and theater. 2 | 3 | The city of Athens was adorned with magnificent structures, with the Parthenon standing as a testament to their architectural brilliance. This iconic temple dedicated to the goddess Athena showcased the city's dedication to its patron deity. The Agora, a bustling marketplace, was the heart of Athenian commerce and social life, where citizens gathered to exchange goods, ideas, and engage in spirited debates. 4 | 5 | Athenian democracy, with its Assembly and Council, laid the foundation for democratic governance. Citizens actively participated in decision-making, and prominent figures like Pericles emerged as influential leaders. The Athenian navy, a formidable force, protected the city-state's interests and expanded its influence across the Aegean Sea. 6 | 7 | The Athenian civilization left an indelible mark on the world of philosophy, with luminaries like Socrates, Plato, and Aristotle shaping the course of human thought. Their philosophical inquiries laid the groundwork for ethical and intellectual exploration that continues to inspire scholars today. 8 | 9 | Theater was another hallmark of ancient Athens, with magnificent amphitheaters like the Theater of Dionysus providing a platform for playwrights like Sophocles and Euripides to showcase their masterpieces. These dramatic works explored profound themes and left a lasting impact on the world of literature and storytelling. 10 | 11 | In the realm of art, Athenian pottery and sculpture displayed exquisite craftsmanship, with pieces like the Discus Thrower capturing the beauty of the human form. The city's cultural achievements in the visual arts, literature, and philosophy continue to resonate in contemporary societies, emphasizing the enduring legacy of ancient Athens. -------------------------------------------------------------------------------- /ch7/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch7/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch7/sample_KeywordNode_Postprocessor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.postprocessor import KeywordNodePostprocessor 2 | from llama_index.core.schema import TextNode, NodeWithScore 3 | 4 | nodes = [ 5 | TextNode( 6 | text="Entry no: 1, - Attack at Dawn" 7 | ), 8 | TextNode( 9 | text="Entry no: 2, - Go to point Bravo" 10 | ), 11 | TextNode( 12 | text="Entry no: 3, - Roses are Red" 13 | ), 14 | ] 15 | 16 | node_with_score_list = [ 17 | NodeWithScore(node=node) for node in nodes 18 | ] 19 | pp = KeywordNodePostprocessor( 20 | exclude_keywords=["SECRET", "RESTRICTED"] 21 | ) 22 | remaining_nodes = pp.postprocess_nodes( 23 | node_with_score_list 24 | ) 25 | print('Remaining nodes:') 26 | for node_with_score in remaining_nodes: 27 | node = node_with_score.node 28 | print(f"Text: {node.text}") 29 | -------------------------------------------------------------------------------- /ch7/sample_Langchain_Output_Parser.py: -------------------------------------------------------------------------------- 1 | from langchain.output_parsers import StructuredOutputParser, ResponseSchema 2 | from llama_index.core.output_parsers import LangchainOutputParser 3 | from llama_index.llms.openai import OpenAI 4 | from llama_index.core.schema import TextNode 5 | from llama_index.core import VectorStoreIndex 6 | from pydantic import BaseModel 7 | from typing import List 8 | 9 | nodes = [ 10 | TextNode( 11 | text="Roses have vibrant colors and smell nice."), 12 | TextNode( 13 | text="Oak trees are tall and have green leaves."), 14 | ] 15 | 16 | schemas = [ 17 | ResponseSchema( 18 | name="answer", 19 | description=( 20 | "answer to the user's question" 21 | ) 22 | ), 23 | ResponseSchema( 24 | name="source", 25 | description=( 26 | "the source text used to answer the user's question, " 27 | "should be a quote from the original prompt." 28 | ) 29 | ) 30 | ] 31 | 32 | lc_parser = StructuredOutputParser.from_response_schemas(schemas) 33 | output_parser = LangchainOutputParser(lc_parser) 34 | 35 | llm = OpenAI(output_parser=output_parser) 36 | 37 | index = VectorStoreIndex(nodes=nodes) 38 | query_engine = index.as_query_engine(llm=llm) 39 | response = query_engine.query( 40 | "Are oak trees small? yes or no", 41 | ) 42 | print(response) 43 | -------------------------------------------------------------------------------- /ch7/sample_Metadata_Replacement_Postprocessor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.postprocessor import MetadataReplacementPostProcessor 2 | from llama_index.core.schema import TextNode, NodeWithScore 3 | 4 | nodes = [ 5 | TextNode( 6 | text="Article 1", 7 | metadata={"summary": "Summary of article 1"} 8 | ), 9 | TextNode( 10 | text="Article 2", 11 | metadata={"summary": "Summary of article 2"} 12 | ), 13 | ] 14 | 15 | node_with_score_list = [ 16 | NodeWithScore(node=node) for node in nodes 17 | ] 18 | pp = MetadataReplacementPostProcessor( 19 | target_metadata_key="summary" 20 | ) 21 | processed_nodes = pp.postprocess_nodes( 22 | node_with_score_list 23 | ) 24 | for node_with_score in processed_nodes: 25 | print(f"Replaced Text: {node_with_score.node.text}") 26 | -------------------------------------------------------------------------------- /ch7/sample_Router_Query_Engine.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.tools import QueryEngineTool 2 | from llama_index.core.query_engine import RouterQueryEngine 3 | from llama_index.core.selectors import PydanticMultiSelector 4 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 5 | from llama_index.core.extractors import TitleExtractor 6 | 7 | documents = SimpleDirectoryReader("files").load_data() 8 | 9 | title_extractor = TitleExtractor() 10 | for doc in documents: 11 | title_metadata = title_extractor.extract([doc]) 12 | doc.metadata.update(title_metadata[0]) 13 | 14 | indexes = [] 15 | query_engines = [] 16 | tools = [] 17 | 18 | for doc in documents: 19 | document_title = doc.metadata['document_title'] 20 | index = SummaryIndex.from_documents([doc]) 21 | query_engine = index.as_query_engine( 22 | response_mode="tree_summarize", 23 | use_async=True, 24 | ) 25 | tool = QueryEngineTool.from_defaults( 26 | query_engine=query_engine, 27 | description=f"Contains data about {document_title}", 28 | ) 29 | indexes.append(index) 30 | query_engines.append(query_engine) 31 | tools.append(tool) 32 | 33 | qe = RouterQueryEngine( 34 | selector=PydanticMultiSelector.from_defaults(), 35 | query_engine_tools=tools 36 | ) 37 | 38 | response = qe.query( 39 | "Tell me about Rome and dogs" 40 | ) 41 | print(response) 42 | -------------------------------------------------------------------------------- /ch7/sample_Similarity_Postprocessor.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.postprocessor import SimilarityPostprocessor 2 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 3 | 4 | reader = SimpleDirectoryReader('files/other') 5 | documents = reader.load_data() 6 | index = VectorStoreIndex.from_documents(documents) 7 | retriever = index.as_retriever(retriever_mode='default') 8 | nodes = retriever.retrieve( 9 | "What did Fluffy found in the gentle stream?" 10 | ) 11 | 12 | print('Initial nodes:') 13 | for node in nodes: 14 | print(f"Node: {node.node_id} - Score: {node.score}") 15 | 16 | pp = SimilarityPostprocessor( 17 | nodes=nodes, 18 | similarity_cutoff=0.86 19 | ) 20 | 21 | remaining_nodes = pp.postprocess_nodes(nodes) 22 | print('Remaining nodes:') 23 | for node in remaining_nodes: 24 | print(f"Node: {node.node_id} - Score: {node.score}") 25 | -------------------------------------------------------------------------------- /ch7/sample_SubQuestion_Query_Engine.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.tools import QueryEngineTool 2 | from llama_index.core.query_engine import RouterQueryEngine 3 | from llama_index.core.query_engine import SubQuestionQueryEngine 4 | from llama_index.core.selectors import PydanticMultiSelector 5 | from llama_index.core.extractors import TitleExtractor 6 | from llama_index.core import SummaryIndex, SimpleDirectoryReader 7 | 8 | documents = SimpleDirectoryReader("files/sample").load_data() 9 | title_extractor = TitleExtractor() 10 | for doc in documents: 11 | title_metadata = title_extractor.extract([doc]) 12 | doc.metadata.update(title_metadata[0]) 13 | 14 | indexes = [] 15 | query_engines = [] 16 | tools = [] 17 | 18 | for doc in documents: 19 | document_title = doc.metadata['document_title'] 20 | file_name = doc.metadata['file_name'] 21 | index = SummaryIndex.from_documents([doc]) 22 | query_engine = index.as_query_engine( 23 | response_mode="tree_summarize", 24 | use_async=True, 25 | ) 26 | tool = QueryEngineTool.from_defaults( 27 | query_engine=query_engine, 28 | name=file_name, 29 | description=f"Contains data about {document_title}", 30 | ) 31 | indexes.append(index) 32 | query_engines.append(query_engine) 33 | tools.append(tool) 34 | 35 | qe = SubQuestionQueryEngine.from_defaults( 36 | query_engine_tools=tools, 37 | use_async=True 38 | ) 39 | 40 | response = qe.query( 41 | "Compare buildings from ancient Athens and ancient Rome" 42 | ) 43 | print(response) 44 | 45 | -------------------------------------------------------------------------------- /ch7/sample_low_level_query_engine.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.retrievers import SummaryIndexEmbeddingRetriever 2 | from llama_index.core.postprocessor import SimilarityPostprocessor 3 | from llama_index.core.query_engine import RetrieverQueryEngine 4 | from llama_index.core import SummaryIndex, SimpleDirectoryReader, get_response_synthesizer 5 | 6 | documents = SimpleDirectoryReader("files").load_data() 7 | index = SummaryIndex.from_documents(documents) 8 | 9 | retriever = SummaryIndexEmbeddingRetriever( 10 | index=index, 11 | similarity_top_k=3, 12 | ) 13 | response_synthesizer = get_response_synthesizer( 14 | response_mode="tree_summarize", 15 | verbose=True 16 | ) 17 | pp = SimilarityPostprocessor(similarity_cutoff=0.7) 18 | 19 | query_engine = RetrieverQueryEngine( 20 | retriever=retriever, 21 | response_synthesizer=response_synthesizer, 22 | node_postprocessors=[pp] 23 | ) 24 | response = query_engine.query( 25 | "Enumerate iconic buildings in ancient Rome" 26 | ) 27 | print(response) 28 | -------------------------------------------------------------------------------- /ch7/sample_response_synthesizer.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.schema import TextNode, NodeWithScore 2 | from llama_index.core import get_response_synthesizer 3 | 4 | nodes = [ 5 | TextNode(text= 6 | "The town square clock was built in 1895" 7 | ), 8 | TextNode(text= 9 | "A turquoise parrot lives in the Amazon" 10 | ), 11 | TextNode(text= 12 | "A rare orchid blooms only at midnight" 13 | ), 14 | ] 15 | 16 | node_with_score_list = [NodeWithScore(node=node) for node in nodes] 17 | synth = get_response_synthesizer( 18 | response_mode="refine", 19 | use_async=False, 20 | streaming=False, 21 | ) 22 | 23 | response = synth.synthesize( 24 | "When was the clock built?", 25 | nodes=node_with_score_list 26 | ) 27 | print(response) -------------------------------------------------------------------------------- /ch8/files/database/employees.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch8/files/database/employees.db -------------------------------------------------------------------------------- /ch8/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch8/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch8/other/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch8/other/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch8/other/sample_document3.txt: -------------------------------------------------------------------------------- 1 | In ancient Athens, the city of Athens itself was the epicenter of the flourishing Athenian civilization. It was renowned for its remarkable achievements in philosophy, politics, and culture. The Athenians were known for their intellectual prowess and creativity, contributing significantly to the fields of philosophy, mathematics, and theater. 2 | 3 | The city of Athens was adorned with magnificent structures, with the Parthenon standing as a testament to their architectural brilliance. This iconic temple dedicated to the goddess Athena showcased the city's dedication to its patron deity. The Agora, a bustling marketplace, was the heart of Athenian commerce and social life, where citizens gathered to exchange goods, ideas, and engage in spirited debates. 4 | 5 | Athenian democracy, with its Assembly and Council, laid the foundation for democratic governance. Citizens actively participated in decision-making, and prominent figures like Pericles emerged as influential leaders. The Athenian navy, a formidable force, protected the city-state's interests and expanded its influence across the Aegean Sea. 6 | 7 | The Athenian civilization left an indelible mark on the world of philosophy, with luminaries like Socrates, Plato, and Aristotle shaping the course of human thought. Their philosophical inquiries laid the groundwork for ethical and intellectual exploration that continues to inspire scholars today. 8 | 9 | Theater was another hallmark of ancient Athens, with magnificent amphitheaters like the Theater of Dionysus providing a platform for playwrights like Sophocles and Euripides to showcase their masterpieces. These dramatic works explored profound themes and left a lasting impact on the world of literature and storytelling. 10 | 11 | In the realm of art, Athenian pottery and sculpture displayed exquisite craftsmanship, with pieces like the Discus Thrower capturing the beauty of the human form. The city's cultural achievements in the visual arts, literature, and philosophy continue to resonate in contemporary societies, emphasizing the enduring legacy of ancient Athens. -------------------------------------------------------------------------------- /ch8/sample_agentrunner.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.agent import AgentRunner 2 | from llama_index.agent.openai import OpenAIAgentWorker 3 | from llama_index.tools.database import DatabaseToolSpec 4 | 5 | db_tools = DatabaseToolSpec(uri="sqlite:///files//database//employees.db") 6 | tools = db_tools.to_tool_list() 7 | 8 | step_engine = OpenAIAgentWorker.from_tools( 9 | tools, 10 | verbose=True 11 | ) 12 | 13 | agent = AgentRunner(step_engine) 14 | input = ( 15 | "Find the highest paid HR employee and write " 16 | "them an email announcing a bonus" 17 | ) 18 | 19 | task = agent.create_task(input) 20 | step_output = agent.run_step(task.task_id) 21 | 22 | while not step_output.is_last: 23 | step_output = agent.run_step(task.task_id) 24 | 25 | response = agent.finalize_response(task.task_id) 26 | print(response) 27 | -------------------------------------------------------------------------------- /ch8/sample_chat_memory.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.storage.chat_store import SimpleChatStore 2 | from llama_index.core.chat_engine import SimpleChatEngine 3 | from llama_index.core.memory import ChatMemoryBuffer 4 | 5 | try: 6 | chat_store = SimpleChatStore.from_persist_path( 7 | persist_path="chat_memory.json" 8 | ) 9 | except FileNotFoundError: 10 | chat_store = SimpleChatStore() 11 | 12 | 13 | memory = ChatMemoryBuffer.from_defaults( 14 | token_limit=2000, 15 | chat_store=chat_store, 16 | chat_store_key="user_X" 17 | ) 18 | 19 | chat_engine = SimpleChatEngine.from_defaults(memory=memory) 20 | while True: 21 | user_message = input("You: ") 22 | if user_message.lower() == 'exit': 23 | print("Exiting chat...") 24 | break 25 | response = chat_engine.chat(user_message) 26 | print(f"Chatbot: {response}") 27 | 28 | chat_store.persist(persist_path="chat_memory.json") 29 | -------------------------------------------------------------------------------- /ch8/sample_condense_question.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 2 | from llama_index.core.chat_engine import CondenseQuestionChatEngine 3 | from llama_index.core.llms import ChatMessage 4 | 5 | documents = SimpleDirectoryReader("files").load_data() 6 | index = VectorStoreIndex.from_documents(documents) 7 | query_engine=index.as_query_engine() 8 | chat_history = [ 9 | ChatMessage( 10 | role="user", 11 | content="Arch of Constantine is a famous" 12 | "building in Rome" 13 | ), 14 | ChatMessage( 15 | role="user", 16 | content="The Pantheon should not be " 17 | "regarded as a famous building" 18 | ), 19 | ] 20 | 21 | chat_engine = CondenseQuestionChatEngine.from_defaults( 22 | query_engine=query_engine, 23 | chat_history=chat_history 24 | ) 25 | response = chat_engine.chat( 26 | "What are two of the most famous structures in ancient Rome?" 27 | ) 28 | print(response) 29 | -------------------------------------------------------------------------------- /ch8/sample_context_chat_engine.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 2 | docs = SimpleDirectoryReader(input_dir="files").load_data() 3 | index = VectorStoreIndex.from_documents(docs) 4 | chat_engine = index.as_chat_engine( 5 | chat_mode="context", 6 | system_prompt=( 7 | "You’re a chatbot, able to talk about " 8 | "general topics, as well as answering specific " 9 | "questions about ancient Rome." 10 | ), 11 | ) 12 | chat_engine.chat_repl() 13 | -------------------------------------------------------------------------------- /ch8/sample_function_tool.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.tools import FunctionTool 2 | 3 | def calculate_average(*values): 4 | """ 5 | Calculates the average of the provided values. 6 | """ 7 | return sum(values) / len(values) 8 | average_tool = FunctionTool.from_defaults( 9 | fn=calculate_average 10 | ) 11 | -------------------------------------------------------------------------------- /ch8/sample_llmcompiler_agent.py: -------------------------------------------------------------------------------- 1 | from llama_index.tools.database import DatabaseToolSpec 2 | from llama_index.packs.agents_llm_compiler import LLMCompilerAgentPack 3 | 4 | db_tools = DatabaseToolSpec(uri="sqlite:///files//database//employees.db") 5 | agent = LLMCompilerAgentPack(db_tools.to_tool_list()) 6 | 7 | response = agent.run( 8 | "List the HR department employee " 9 | "with the highest salary " 10 | ) 11 | 12 | # am schimbat doar importul . nu mai e necesar download-ul inainte 13 | # de mentionat: pip install llama-index-packs-agents-llm-compiler -------------------------------------------------------------------------------- /ch8/sample_loandandsearch_toolspec.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.tools.tool_spec.load_and_search.base import LoadAndSearchToolSpec 2 | from llama_index.tools.database import DatabaseToolSpec 3 | from llama_index.agent.openai import OpenAIAgent 4 | from llama_index.llms.openai import OpenAI 5 | 6 | db_tools = DatabaseToolSpec(uri="sqlite:///files//database//employees.db") 7 | tool_list = db_tools.to_tool_list() 8 | tools=LoadAndSearchToolSpec.from_defaults(tool_list[0]).to_tool_list() 9 | 10 | llm = OpenAI(model="gpt-4") 11 | agent = OpenAIAgent.from_tools( 12 | tools=tools, 13 | llm=llm, 14 | verbose=True 15 | ) 16 | response = agent.chat( 17 | "Who has the highest salary in the Employees table?'") 18 | print(response) 19 | -------------------------------------------------------------------------------- /ch8/sample_ondemandloader_tool.py: -------------------------------------------------------------------------------- 1 | from llama_index.agent.openai import OpenAIAgent 2 | from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool 3 | from llama_index.readers.wikipedia import WikipediaReader 4 | 5 | tool = OnDemandLoaderTool.from_defaults( 6 | WikipediaReader(), 7 | name="WikipediaReader", 8 | description="args: {'pages': [],'query_str': }" 9 | ) 10 | 11 | agent = OpenAIAgent.from_tools( 12 | tools=[tool], 13 | verbose=True 14 | ) 15 | response = agent.chat( 16 | "What were some famous buildings in ancient Rome?") 17 | print(response) 18 | -------------------------------------------------------------------------------- /ch8/sample_openai_agent.py: -------------------------------------------------------------------------------- 1 | from llama_index.tools.database import DatabaseToolSpec 2 | from llama_index.core.tools import FunctionTool 3 | from llama_index.agent.openai import OpenAIAgent 4 | from llama_index.llms.openai import OpenAI 5 | 6 | def write_text_to_file(text, filename): 7 | """ 8 | Writes the text to a file with the specified filename. 9 | Args: 10 | text (str): The text to be written to the file. 11 | filename (str): File name to write the text into. 12 | Returns: None 13 | """ 14 | with open(filename, 'w') as file: 15 | file.write(text) 16 | 17 | save_tool = FunctionTool.from_defaults(fn=write_text_to_file) 18 | db_tools = DatabaseToolSpec(uri="sqlite:///files//database//employees.db") 19 | tools = [save_tool]+db_tools.to_tool_list() 20 | 21 | llm = OpenAI(model="gpt-4") 22 | agent = OpenAIAgent.from_tools( 23 | tools=tools, 24 | llm=llm, 25 | verbose=True, 26 | max_function_calls=20 27 | ) 28 | 29 | response = agent.chat( 30 | "For each IT department employee with a salary lower " 31 | "than the average organization salary, write an email," 32 | "announcing a 10% raise and then save all emails into " 33 | "a file called 'emails.txt'") 34 | print(response) 35 | -------------------------------------------------------------------------------- /ch8/sample_simple_chat_engine.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.chat_engine import SimpleChatEngine 2 | chat_engine = SimpleChatEngine.from_defaults() 3 | chat_engine.chat_repl() 4 | -------------------------------------------------------------------------------- /ch8/sample_simple_chat_engine_custom_LLM.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.chat_engine import SimpleChatEngine 2 | from llama_index.llms.openai import OpenAI 3 | 4 | llm = OpenAI(temperature=0.8, model="gpt-4") 5 | chat_engine = SimpleChatEngine.from_defaults(llm=llm) 6 | chat_engine.chat_repl() 7 | -------------------------------------------------------------------------------- /ch9/files/database/employees.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Data-Driven-Applications-with-LlamaIndex/3acc296df6e8a9c8c5aafe15b589c69458215e3f/ch9/files/database/employees.db -------------------------------------------------------------------------------- /ch9/files/sample_document1.txt: -------------------------------------------------------------------------------- 1 | In ancient Rome, the city of Rome itself was the heart of the vast Roman Empire. It was known for its grand architecture, including iconic structures like the Colosseum and the Pantheon. The Romans were skilled engineers and builders, creating an extensive network of roads, aqueducts, and bridges that connected their far-reaching territories. The Roman Republic, with its Senate and elected officials, gave rise to the famous Roman legions, which conquered vast lands and brought them under Roman rule. The Roman civilization's influence on art, law, and governance can still be seen in modern societies today. -------------------------------------------------------------------------------- /ch9/files/sample_document2.txt: -------------------------------------------------------------------------------- 1 | Many people consider dogs to be their loyal companions. These furry creatures come in various breeds, each with its own unique traits and personalities. From the energetic and playful Labrador Retriever to the dignified and intelligent German Shepherd, there's a dog for every type of person. Dogs have been known to provide comfort, protection, and unwavering love to their owners, making them one of the most beloved pets worldwide. Whether they're chasing a ball in the park or curling up by the fireplace, dogs bring joy to our lives in countless ways. -------------------------------------------------------------------------------- /ch9/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.26.1 2 | aiohttp==3.9.3 3 | aiosignal==1.3.1 4 | altair==5.2.0 5 | annotated-types==0.6.0 6 | anyio==4.3.0 7 | appdirs==1.4.4 8 | arize-phoenix==3.4.1 9 | asgiref==3.7.2 10 | attrs==23.2.0 11 | backoff==2.2.1 12 | bcrypt==4.1.2 13 | beautifulsoup4==4.12.3 14 | bitsandbytes==0.42.0 15 | blinker==1.7.0 16 | bs4==0.0.2 17 | build==1.0.3 18 | cachetools==5.3.3 19 | certifi==2024.2.2 20 | cffi==1.16.0 21 | charset-normalizer==3.3.2 22 | chroma-hnswlib==0.7.3 23 | chromadb==0.4.24 24 | chromedriver-autoinstaller==0.6.4 25 | click==8.1.7 26 | colorama==0.4.6 27 | coloredlogs==15.0.1 28 | cssselect==1.2.0 29 | Cython==0.29.37 30 | dataclasses-json==0.6.4 31 | datasets==2.17.1 32 | ddsketch==2.0.4 33 | deepeval==0.20.77 34 | Deprecated==1.2.14 35 | dill==0.3.8 36 | dirtyjson==1.0.8 37 | distro==1.9.0 38 | execnet==2.0.2 39 | fastapi==0.110.0 40 | feedfinder2==0.0.4 41 | feedparser==6.0.11 42 | filelock==3.13.1 43 | flatbuffers==23.5.26 44 | frozenlist==1.4.1 45 | fsspec==2023.10.0 46 | gitdb==4.0.11 47 | GitPython==3.1.42 48 | google-auth==2.28.1 49 | googleapis-common-protos==1.62.0 50 | graphql-core==3.2.3 51 | greenlet==3.0.3 52 | grpcio==1.62.0 53 | h11==0.14.0 54 | hdbscan==0.8.33 55 | html2text==2020.1.16 56 | httpcore==1.0.4 57 | httptools==0.6.1 58 | httpx==0.25.2 59 | huggingface-hub==0.20.3 60 | humanfriendly==10.0 61 | idna==3.6 62 | importlib-metadata==6.11.0 63 | importlib_resources==6.1.2 64 | iniconfig==2.0.0 65 | jieba3k==0.35.1 66 | Jinja2==3.1.2 67 | joblib==1.3.2 68 | jsonpatch==1.33 69 | jsonpointer==2.4 70 | jsonschema==4.21.1 71 | jsonschema-specifications==2023.12.1 72 | kubernetes==29.0.0 73 | langchain==0.1.9 74 | langchain-community==0.0.24 75 | langchain-core==0.1.28 76 | langchain-openai==0.0.8 77 | langsmith==0.1.10 78 | llama-hub==0.0.79.post1 79 | llama-index==0.10.3 80 | llama-index-agent-openai==0.1.5 81 | llama-index-callbacks-arize-phoenix==0.1.4 82 | llama-index-callbacks-deepeval==0.1.2 83 | llama-index-cli==0.1.7 84 | llama-index-core==0.10.13 85 | llama-index-embeddings-mistralai==0.1.3 86 | llama-index-embeddings-openai==0.1.6 87 | llama-index-indices-managed-llama-cloud==0.1.3 88 | llama-index-legacy==0.9.48 89 | llama-index-llms-huggingface==0.1.3 90 | llama-index-llms-neutrino==0.1.4 91 | llama-index-llms-openai==0.1.6 92 | llama-index-llms-openai-like==0.1.3 93 | llama-index-multi-modal-llms-openai==0.1.4 94 | llama-index-packs-arize-phoenix-query-engine==0.1.4 95 | llama-index-packs-evaluator-benchmarker==0.1.3 96 | llama-index-packs-rag-evaluator==0.1.3 97 | llama-index-packs-zephyr-query-engine==0.1.2 98 | llama-index-program-evaporate==0.1.2 99 | llama-index-program-openai==0.1.4 100 | llama-index-question-gen-openai==0.1.3 101 | llama-index-readers-file==0.1.6 102 | llama-index-readers-llama-parse==0.1.3 103 | llama-index-readers-web==0.1.6 104 | llama-index-vector-stores-chroma==0.1.4 105 | llama-parse==0.3.4 106 | llamaindex-py-client==0.1.13 107 | llvmlite==0.42.0 108 | lxml==5.1.0 109 | markdown-it-py==3.0.0 110 | MarkupSafe==2.1.3 111 | marshmallow==3.21.0 112 | mdurl==0.1.2 113 | mistralai==0.0.11 114 | mmh3==4.1.0 115 | monotonic==1.6 116 | mpmath==1.3.0 117 | multidict==6.0.5 118 | multiprocess==0.70.16 119 | mypy-extensions==1.0.0 120 | nest-asyncio==1.6.0 121 | networkx==3.2.1 122 | newspaper3k==0.2.8 123 | nltk==3.8.1 124 | numba==0.59.0 125 | numpy==1.26.4 126 | oauthlib==3.2.2 127 | onnxruntime==1.17.1 128 | openai==1.12.0 129 | openinference-instrumentation-langchain==0.1.11 130 | openinference-instrumentation-llama-index==1.1.0 131 | openinference-instrumentation-openai==0.1.3 132 | openinference-semantic-conventions==0.1.4 133 | opentelemetry-api==1.23.0 134 | opentelemetry-exporter-otlp==1.23.0 135 | opentelemetry-exporter-otlp-proto-common==1.23.0 136 | opentelemetry-exporter-otlp-proto-grpc==1.23.0 137 | opentelemetry-exporter-otlp-proto-http==1.23.0 138 | opentelemetry-instrumentation==0.44b0 139 | opentelemetry-instrumentation-asgi==0.44b0 140 | opentelemetry-instrumentation-fastapi==0.44b0 141 | opentelemetry-proto==1.23.0 142 | opentelemetry-sdk==1.23.0 143 | opentelemetry-semantic-conventions==0.44b0 144 | opentelemetry-util-http==0.44b0 145 | orjson==3.9.15 146 | outcome==1.3.0.post0 147 | overrides==7.7.0 148 | packaging==23.2 149 | pandas==2.2.1 150 | pillow==10.2.0 151 | playwright==1.41.2 152 | pluggy==1.4.0 153 | portalocker==2.8.2 154 | posthog==3.4.2 155 | protobuf==4.25.1 156 | psutil==5.9.8 157 | pulsar-client==3.4.0 158 | pyaml==23.12.0 159 | pyarrow==15.0.0 160 | pyarrow-hotfix==0.6 161 | pyasn1==0.5.1 162 | pyasn1-modules==0.3.0 163 | pycparser==2.21 164 | pydantic==2.6.3 165 | pydantic_core==2.16.3 166 | pydeck==0.8.1b0 167 | pyee==11.0.1 168 | Pygments==2.17.2 169 | PyMuPDF==1.23.25 170 | PyMuPDFb==1.23.22 171 | pynndescent==0.5.11 172 | pypdf==4.0.2 173 | PyPika==0.48.9 174 | pyproject_hooks==1.0.0 175 | pyreadline3==3.4.1 176 | pysbd==0.3.4 177 | PySocks==1.7.1 178 | pytest==8.0.2 179 | pytest-xdist==3.5.0 180 | python-dateutil==2.8.2 181 | python-dotenv==1.0.1 182 | pytz==2024.1 183 | pywin32==306 184 | PyYAML==6.0.1 185 | ragas==0.1.3 186 | referencing==0.33.0 187 | regex==2023.12.25 188 | requests==2.31.0 189 | requests-file==2.0.0 190 | requests-oauthlib==1.3.1 191 | retrying==1.3.4 192 | rich==13.7.1 193 | rpds-py==0.18.0 194 | rsa==4.9 195 | safetensors==0.4.2 196 | scikit-learn==1.2.2 197 | scipy==1.12.0 198 | selenium==4.18.1 199 | sentry-sdk==1.40.6 200 | sgmllib3k==1.0.0 201 | six==1.16.0 202 | smmap==5.0.1 203 | sniffio==1.3.1 204 | sortedcontainers==2.4.0 205 | soupsieve==2.5 206 | SQLAlchemy==2.0.27 207 | starlette==0.36.3 208 | strawberry-graphql==0.208.2 209 | streamlit==1.31.1 210 | sympy==1.12 211 | tabulate==0.9.0 212 | tenacity==8.2.3 213 | threadpoolctl==3.3.0 214 | tiktoken==0.6.0 215 | tinysegmenter==0.3 216 | tldextract==5.1.1 217 | tokenizers==0.15.2 218 | toml==0.10.2 219 | toolz==0.12.1 220 | torch==2.2.1+cu121 221 | torchaudio==2.2.1+cu121 222 | torchvision==0.17.1+cu121 223 | tornado==6.4 224 | tqdm==4.66.2 225 | transformers==4.38.1 226 | trio==0.24.0 227 | trio-websocket==0.11.1 228 | typer==0.9.0 229 | typing-inspect==0.9.0 230 | typing_extensions==4.10.0 231 | tzdata==2024.1 232 | tzlocal==5.2 233 | umap-learn==0.5.5 234 | urllib3==2.2.1 235 | uvicorn==0.27.1 236 | validators==0.22.0 237 | watchdog==4.0.0 238 | watchfiles==0.21.0 239 | websocket-client==1.7.0 240 | websockets==12.0 241 | wrapt==1.16.0 242 | wsproto==1.2.0 243 | xxhash==3.4.1 244 | yarl==1.9.4 245 | zipp==3.17.0 246 | -------------------------------------------------------------------------------- /ch9/sample_eval_phoenix.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import ( 2 | SimpleDirectoryReader, 3 | VectorStoreIndex, 4 | set_global_handler 5 | ) 6 | import phoenix as px 7 | 8 | px.launch_app() 9 | set_global_handler("arize_phoenix") 10 | 11 | documents = SimpleDirectoryReader('files').load_data() 12 | index = VectorStoreIndex.from_documents(documents) 13 | qe = index.as_query_engine() 14 | response1 = qe.query("Tell me about ancient Rome") 15 | response2 = qe.query("Where is the Colosseum?") 16 | print(str(response1)+"\n"+str(response2)) 17 | 18 | # EVALUATION PART 19 | # adapted from the examples available on the official Phoenix documentation: https://docs.arize.com/phoenix/ 20 | 21 | from phoenix.session.evaluation import ( 22 | get_qa_with_reference, 23 | get_retrieved_documents 24 | ) 25 | from phoenix.trace import DocumentEvaluations, SpanEvaluations 26 | from phoenix.experimental.evals import ( 27 | HallucinationEvaluator, 28 | QAEvaluator, 29 | RelevanceEvaluator, 30 | OpenAIModel, 31 | run_evals 32 | ) 33 | model = OpenAIModel(model="gpt-4-turbo-preview") 34 | 35 | retrieved_documents_df = get_retrieved_documents(px.Client()) 36 | queries_df = get_qa_with_reference(px.Client()) 37 | 38 | hallucination_evaluator = HallucinationEvaluator(model) 39 | qa_correctness_evaluator = QAEvaluator(model) 40 | relevance_evaluator = RelevanceEvaluator(model) 41 | 42 | hallucination_eval_df, qa_correctness_eval_df = run_evals( 43 | dataframe=queries_df, 44 | evaluators=[hallucination_evaluator, qa_correctness_evaluator], 45 | provide_explanation=True, 46 | ) 47 | relevance_eval_df = run_evals( 48 | dataframe=retrieved_documents_df, 49 | evaluators=[relevance_evaluator], 50 | provide_explanation=True, 51 | )[0] 52 | 53 | px.Client().log_evaluations( 54 | SpanEvaluations( 55 | eval_name="Hallucination", 56 | dataframe=hallucination_eval_df), 57 | SpanEvaluations( 58 | eval_name="QA Correctness", 59 | dataframe=qa_correctness_eval_df), 60 | DocumentEvaluations( 61 | eval_name="Relevance", 62 | dataframe=relevance_eval_df), 63 | ) 64 | 65 | input("Press to exit...") 66 | -------------------------------------------------------------------------------- /ch9/sample_llama_packs_zephyr.py: -------------------------------------------------------------------------------- 1 | from llama_index.packs.zephyr_query_engine import ZephyrQueryEnginePack 2 | from llama_index.core import SimpleDirectoryReader 3 | reader = SimpleDirectoryReader('files') 4 | documents = reader.load_data() 5 | zephyr_qe = ZephyrQueryEnginePack(documents) 6 | response=zephyr_qe.run( 7 | "Enumerate famous buildings in ancient Rome" 8 | ) 9 | print(response) -------------------------------------------------------------------------------- /ch9/sample_lm_studio_LLM.py: -------------------------------------------------------------------------------- 1 | from llama_index.llms.openai import OpenAI 2 | llm = OpenAI( 3 | api_base='http://localhost:1234/v1', 4 | temperature=0.7 5 | ) 6 | print(llm.complete('Who is Lionel Messi?')) 7 | -------------------------------------------------------------------------------- /ch9/sample_neutrino.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.llms import ChatMessage 2 | from llama_index.llms.neutrino import Neutrino 3 | 4 | llm = Neutrino( 5 | api_key="", 6 | router="" 7 | ) 8 | 9 | while True: 10 | user_message = input("Ask a question: ") 11 | if user_message.lower() == 'exit': 12 | print("Exiting chat...") 13 | break 14 | response = llm.complete(user_message) 15 | print(f"LLM answer: {response}") 16 | print(f"Answered by: {response.raw['model']}") 17 | 18 | 19 | ''' 20 | To globally configure the llm, use Settings like this: 21 | 22 | from llama_index.core import Settings 23 | Settings.llm = llm 24 | ''' -------------------------------------------------------------------------------- /ch9/sample_tracing_phoenix2.py: -------------------------------------------------------------------------------- 1 | from llama_index.core import ( 2 | SimpleDirectoryReader, 3 | VectorStoreIndex, 4 | set_global_handler 5 | ) 6 | import phoenix as px 7 | 8 | px.launch_app() 9 | set_global_handler("arize_phoenix") 10 | 11 | documents = SimpleDirectoryReader('files').load_data() 12 | index = VectorStoreIndex.from_documents(documents) 13 | qe = index.as_query_engine() 14 | response = qe.query("Tell me about ancient Rome") 15 | print(response) 16 | 17 | input("Press to exit...") 18 | --------------------------------------------------------------------------------