├── main.ipynb └── router.ipynb /main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from llama_index import download_loader, ServiceContext, VectorStoreIndex\n", 10 | "from dotenv import load_dotenv, find_dotenv\n", 11 | "from llama_index.llms import OpenAI\n", 12 | "import openai\n", 13 | "from llama_index.embeddings import OpenAIEmbedding\n", 14 | "from llama_index.embeddings.openai import OpenAIEmbeddingModelType\n", 15 | "import os\n", 16 | "\n", 17 | "_ = load_dotenv(find_dotenv()) # read local .env file\n", 18 | "openai.api_key = os.environ['OPENAI_API_KEY']\n", 19 | "\n", 20 | "WikipediaReader = download_loader(\"WikipediaReader\")\n", 21 | "\n", 22 | "loader = WikipediaReader()\n", 23 | "pages = ['Nicolas_Cage', 'The_Best_of_Times_(1981_film)', 'Leonardo DiCaprio']\n", 24 | "documents = loader.load_data(pages=pages, auto_suggest=False, redirect = False)\n", 25 | "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n", 26 | "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")\n", 27 | "\n", 28 | "embed_model = OpenAIEmbedding(model= OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002)\n", 29 | "\n", 30 | "service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3, chunk_size = 256, chunk_overlap=0, embed_model=embed_model)\n", 31 | "\n", 32 | "index = VectorStoreIndex.from_documents(documents, service_context=service_context_gpt3)\n", 33 | "\n", 34 | "retriever = index.as_retriever(similarity_top_k=3)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# The response from original prompt\n", 44 | "from llama_index.prompts import PromptTemplate\n", 45 | "\n", 46 | "template = (\n", 47 | "\"We have provided context information below. \\n\"\n", 48 | " \"---------------------\\n\"\n", 49 | " \"{context_str}\"\n", 50 | " \"\\n---------------------\\n\"\n", 51 | " \"Given this information, please answer the question: {query_str}\\n\"\n", 52 | " \"Don't give an answer unless it is supported by the context above.\\n\"\n", 53 | ")\n", 54 | "\n", 55 | "qa_template = PromptTemplate(template)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### Query 1" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "The context does not provide information about who directed the pilot that marked the acting debut of Nicolas Cage.\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "question = \"Who directed the pilot that marked the acting debut of Nicolas Cage?\"\n", 80 | "\n", 81 | "contexts = retriever.retrieve(question)\n", 82 | "\n", 83 | "# you can create text prompt (for completion API)\n", 84 | "context_list = [n.get_content() for n in contexts]\n", 85 | "\n", 86 | "prompt = qa_template.format(context_str=\"\\n\\n\".join(context_list), query_str=question)\n", 87 | "\n", 88 | "response = llm.complete(prompt)\n", 89 | "print(str(response))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "['It blew my mind. I was like, \\'That\\'s what I want to do\\'.\"At age 15, he tried to convince his uncle, Francis Ford Coppola, to give him a screen test, telling him \"I\\'ll show you acting.\" His outburst was met with \"silence in the car\". By this stage of his career, Coppola had already directed Marlon Brando, Al Pacino, Gene Hackman and Robert De Niro. Although early in his career Cage appeared in some of his uncle\\'s films, he changed his name to Nicolas Cage to avoid the appearance of nepotism as Coppola\\'s nephew. His choice of name was inspired by the Marvel Comics superhero Luke Cage and composer John Cage.\\n\\n\\n== Career ==\\n\\n\\n=== 1981–1988: Early work and breakthrough ===\\nCage made his acting debut in the 1981 television pilot The Best of Times, which was never picked up by ABC. His film debut followed in 1982, with a minor role as an unnamed co-worker of Judge Reinhold\\'s character in the coming-of-age film Fast Times at Ridgemont High, having originally auditioned for Reinhold\\'s part.',\n", 101 | " 'Nicolas Kim Coppola (born January 7, 1964), known by his stage name Nicolas Cage, is an American actor and film producer. He is the recipient of various accolades, including an Academy Award, a Screen Actors Guild Award, and a Golden Globe Award. Known for his versatility as an actor, his participation in various film genres has gained him a cult following.Born into the Coppola family, Cage began his career in films such as Fast Times at Ridgemont High (1982) and Valley Girl (1983), as well various films by his uncle Francis Ford Coppola such as Rumble Fish (1983), The Cotton Club (1984), and Peggy Sue Got Married (1986). He earned critical success for his roles in Moonstruck (1987) and Raising Arizona (1987), and earned an Academy Award for Best Actor for his performance in the dramatic film Leaving Las Vegas (1995). He received another Academy Award nomination for his performance as twins Charlie and Donald Kaufman in the comedy-drama film Adaptation (2002).',\n", 102 | " 'His outburst was met with \"silence in the car\". By this stage of his career, Coppola had already directed Marlon Brando, Al Pacino, Gene Hackman and Robert De Niro. Although early in his career Cage appeared in some of his uncle\\'s films, he changed his name to Nicolas Cage to avoid the appearance of nepotism as Coppola\\'s nephew. His choice of name was inspired by the Marvel Comics superhero Luke Cage and composer John Cage.\\n\\n\\n== Career ==\\n\\n\\n=== 1981–1988: Early work and breakthrough ===\\nCage made his acting debut in the 1981 television pilot The Best of Times, which was never picked up by ABC. His film debut followed in 1982, with a minor role as an unnamed co-worker of Judge Reinhold\\'s character in the coming-of-age film Fast Times at Ridgemont High, having originally auditioned for Reinhold\\'s part. His experience on the film was marred by cast members endlessly quoting his uncle\\'s films, which inspired him to change his name.Cage\\'s first starring role came opposite Deborah Foreman in the romantic comedy Valley Girl (1983), in which he played a punk who falls in love with the titular valley girl, a plot loosely inspired by Romeo and Juliet.']" 103 | ] 104 | }, 105 | "execution_count": 5, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "context_list" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Query 2" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 21, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "Based on the provided context, both Nicolas Cage and Leonardo DiCaprio had different educational experiences. DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School before enrolling at John Marshall High School. However, he dropped out of high school and eventually earned a general equivalency diploma. On the other hand, there is no specific information provided about Nicolas Cage's education in the given context.\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "question = \"Compare the education received by Nicolas Cage and Leonardo DiCaprio.\"\n", 136 | "\n", 137 | "contexts = retriever.retrieve(question)\n", 138 | "\n", 139 | "# you can create text prompt (for completion API)\n", 140 | "context_list = [n.get_content() for n in contexts]\n", 141 | "\n", 142 | "prompt = qa_template.format(context_str=\"\\n\\n\".join(context_list), query_str=question)\n", 143 | "\n", 144 | "response = llm.complete(prompt)\n", 145 | "print(str(response))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 22, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "['He has described his parents as \"bohemian in every sense of the word\" and as \"the people I trust the most in the world\". DiCaprio has stated that he grew up poor in a neighborhood plagued with prostitution, crime and violence. Attending the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School, he later enrolled at the John Marshall High School. DiCaprio disliked public school and wanted to audition for acting jobs instead. He dropped out of high school later, eventually earning a general equivalency diploma.As a child, DiCaprio wanted to become either a marine biologist or an actor. He eventually favored the latter; he liked impersonating characters and imitating people, and enjoyed seeing their reactions to his acting. According to DiCaprio, his interest in performing began at the age of two when he went onto the stage at a performance festival and danced spontaneously to a positive response from the crowd. He was also motivated to learn acting when Farrar\\'s appearance in a television commercial earned him $50,000.',\n", 157 | " 'Nicolas Kim Coppola (born January 7, 1964), known by his stage name Nicolas Cage, is an American actor and film producer. He is the recipient of various accolades, including an Academy Award, a Screen Actors Guild Award, and a Golden Globe Award. Known for his versatility as an actor, his participation in various film genres has gained him a cult following.Born into the Coppola family, Cage began his career in films such as Fast Times at Ridgemont High (1982) and Valley Girl (1983), as well various films by his uncle Francis Ford Coppola such as Rumble Fish (1983), The Cotton Club (1984), and Peggy Sue Got Married (1986). He earned critical success for his roles in Moonstruck (1987) and Raising Arizona (1987), and earned an Academy Award for Best Actor for his performance in the dramatic film Leaving Las Vegas (1995). He received another Academy Award nomination for his performance as twins Charlie and Donald Kaufman in the comedy-drama film Adaptation (2002).',\n", 158 | " \"DiCaprio has stated that he grew up poor in a neighborhood plagued with prostitution, crime and violence. Attending the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School, he later enrolled at the John Marshall High School. DiCaprio disliked public school and wanted to audition for acting jobs instead. He dropped out of high school later, eventually earning a general equivalency diploma.As a child, DiCaprio wanted to become either a marine biologist or an actor. He eventually favored the latter; he liked impersonating characters and imitating people, and enjoyed seeing their reactions to his acting. According to DiCaprio, his interest in performing began at the age of two when he went onto the stage at a performance festival and danced spontaneously to a positive response from the crowd. He was also motivated to learn acting when Farrar's appearance in a television commercial earned him $50,000. DiCaprio has said in interviews that his first television appearance was in the children's series Romper Room, and that he was dismissed from the show for being disruptive. The show's host has denied that any children were removed from the show in this way.\"]" 159 | ] 160 | }, 161 | "execution_count": 22, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "context_list" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "# Multi-Step Query" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 23, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "import logging\n", 184 | "\n", 185 | "# Set logging level to WARNING to suppress INFO and DEBUG messages\n", 186 | "logging.basicConfig(level=logging.WARNING)\n", 187 | "logging.getLogger(\"httpx\").setLevel(logging.WARNING)\n", 188 | "\n", 189 | "from llama_index.indices.query.query_transform.base import (\n", 190 | " StepDecomposeQueryTransform,\n", 191 | ")\n", 192 | "from llama_index.query_engine.multistep_query_engine import (\n", 193 | " MultiStepQueryEngine,\n", 194 | ")\n", 195 | "\n", 196 | "# gpt-3\n", 197 | "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(\n", 198 | " llm=gpt3, verbose=True\n", 199 | ")\n", 200 | "index_summary = \"Used to answer questions\"\n", 201 | "\n", 202 | "query_engine = index.as_query_engine(service_context=service_context_gpt3)\n", 203 | "\n", 204 | "query_engine = MultiStepQueryEngine(\n", 205 | " query_engine=query_engine,\n", 206 | " query_transform=step_decompose_transform_gpt3,\n", 207 | " index_summary=index_summary,\n", 208 | " num_steps=2\n", 209 | ")" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "### Query 1" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 25, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "\u001b[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 229 | "\u001b[0m\u001b[1;3;38;5;200m> New query: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 230 | "\u001b[0m\u001b[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 231 | "\u001b[0m\u001b[1;3;38;5;200m> New query: Who directed the Best of Times pilot that marked the acting debut of Nicolas Cage?\n", 232 | "\u001b[0mDon Mischer directed the pilot that marked the acting debut of Nicolas Cage.\n", 233 | "[(' Who directed the pilot that marked the acting debut of Nicolas Cage?', ' The Best of Times pilot that marked the acting debut of Nicolas Cage was not directed by anyone in the Coppola family. It was directed by Rod Amateau.'), (' Who directed the Best of Times pilot that marked the acting debut of Nicolas Cage?', ' Don Mischer directed the Best of Times pilot that marked the acting debut of Nicolas Cage.')]\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "response_gpt3 = query_engine.query(\n", 239 | " \"Who directed the pilot that marked the acting debut of Nicolas Cage?\",\n", 240 | ")\n", 241 | "print(str(response_gpt3))\n", 242 | "sub_qa_q1 = response_gpt3.metadata[\"sub_qa\"]\n", 243 | "tuples = [(t[0], t[1].response) for t in sub_qa_q1]\n", 244 | "print(tuples)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 26, 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": [ 255 | "[(' Who directed the pilot that marked the acting debut of Nicolas Cage?',\n", 256 | " Response(response=' The Best of Times pilot that marked the acting debut of Nicolas Cage was not directed by anyone in the Coppola family. It was directed by Rod Amateau.', source_nodes=[NodeWithScore(node=TextNode(id_='669fa13f-f82f-45d2-a216-f34704b4faf5', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={: RelatedNodeInfo(node_id='90329e81-b998-4852-9749-1e0578a1c0f9', node_type=, metadata={}, hash='bd8d403a56276363f7c740bd2bf0063238d599ff367d16c35a896ace089a6588'), : RelatedNodeInfo(node_id='5f20a4a6-f50d-41fb-9926-cee921f4c96f', node_type=, metadata={}, hash='7ff2dfa6446c234ed67fc726db050efbb3306e952c8920e12d9bbf6d8836ed79'), : RelatedNodeInfo(node_id='c45eea83-3dce-467f-ba41-0eee51210e0a', node_type=, metadata={}, hash='3b8df75613a3c41579eb78d7b5338a4916187c9bdbd55efc5c53c87d31abd0ad')}, hash='06377fc11f3f02bbb8f159c545209f9b6231f5c16fa14c5839cd9356d5d02cff', text='It blew my mind. I was like, \\'That\\'s what I want to do\\'.\"At age 15, he tried to convince his uncle, Francis Ford Coppola, to give him a screen test, telling him \"I\\'ll show you acting.\" His outburst was met with \"silence in the car\". By this stage of his career, Coppola had already directed Marlon Brando, Al Pacino, Gene Hackman and Robert De Niro. Although early in his career Cage appeared in some of his uncle\\'s films, he changed his name to Nicolas Cage to avoid the appearance of nepotism as Coppola\\'s nephew. His choice of name was inspired by the Marvel Comics superhero Luke Cage and composer John Cage.\\n\\n\\n== Career ==\\n\\n\\n=== 1981–1988: Early work and breakthrough ===\\nCage made his acting debut in the 1981 television pilot The Best of Times, which was never picked up by ABC. His film debut followed in 1982, with a minor role as an unnamed co-worker of Judge Reinhold\\'s character in the coming-of-age film Fast Times at Ridgemont High, having originally auditioned for Reinhold\\'s part.', start_char_idx=3330, end_char_idx=4326, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8327656080541779), NodeWithScore(node=TextNode(id_='096056f1-b770-4504-ba9d-45ba60f27718', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={: RelatedNodeInfo(node_id='90329e81-b998-4852-9749-1e0578a1c0f9', node_type=, metadata={}, hash='bd8d403a56276363f7c740bd2bf0063238d599ff367d16c35a896ace089a6588'), : RelatedNodeInfo(node_id='05af8e04-8aa8-4a44-b74f-37e8b15f5905', node_type=, metadata={}, hash='0144e0f063f964c00b07bfe140e2d95582c5a462b2e50e3bc6dc6ebd51fc45c2')}, hash='3cf5fc4e01c79e88435e9b9c837fda9d4b1a4b50bc8f3a29423b1c4a2d8c5e7a', text='Nicolas Kim Coppola (born January 7, 1964), known by his stage name Nicolas Cage, is an American actor and film producer. He is the recipient of various accolades, including an Academy Award, a Screen Actors Guild Award, and a Golden Globe Award. Known for his versatility as an actor, his participation in various film genres has gained him a cult following.Born into the Coppola family, Cage began his career in films such as Fast Times at Ridgemont High (1982) and Valley Girl (1983), as well various films by his uncle Francis Ford Coppola such as Rumble Fish (1983), The Cotton Club (1984), and Peggy Sue Got Married (1986). He earned critical success for his roles in Moonstruck (1987) and Raising Arizona (1987), and earned an Academy Award for Best Actor for his performance in the dramatic film Leaving Las Vegas (1995). He received another Academy Award nomination for his performance as twins Charlie and Donald Kaufman in the comedy-drama film Adaptation (2002).', start_char_idx=0, end_char_idx=975, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8279893879887947)], metadata={'669fa13f-f82f-45d2-a216-f34704b4faf5': {}, '096056f1-b770-4504-ba9d-45ba60f27718': {}})),\n", 257 | " (' Who directed the Best of Times pilot that marked the acting debut of Nicolas Cage?',\n", 258 | " Response(response=' Don Mischer directed the Best of Times pilot that marked the acting debut of Nicolas Cage.', source_nodes=[NodeWithScore(node=TextNode(id_='cfef3e14-7f7f-491a-8168-e5ca6299d6e2', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={: RelatedNodeInfo(node_id='6c86dbde-efd6-4117-9258-f64c73a62534', node_type=, metadata={}, hash='84a244582f6571b967f09245a321158101fb6f7cb4f8dab32f017ce4bdbf01d2'), : RelatedNodeInfo(node_id='49622e0e-64e5-45eb-9b8c-c85c768172d8', node_type=, metadata={}, hash='a1b7d81cf1c91e865f6e64fa6470f93104aa96ecee8e5cb56c293bdb3f67d8f6'), : RelatedNodeInfo(node_id='376e3853-d8d6-4696-80ee-0a5de6046ce8', node_type=, metadata={}, hash='698dac9f11606cbb260290f9b8ba1edb05097a3ede30a8f23ac01ebcb497ef56')}, hash='84a244582f6571b967f09245a321158101fb6f7cb4f8dab32f017ce4bdbf01d2', text=\"The Best of Times is a 1981 television pilot episode directed by Don Mischer that was never picked up as a series. It marked the acting debuts of Nicolas Cage and Crispin Glover.\\n\\n\\n== Plot ==\\nA variety show about life as a teenager as seen through the eyes of eight actual teenagers (Crispin, Julie, Jill, Nicolas, Kevin, Lisa, David and Janet) who perform skits, songs and dances that relate their views between childhood and adulthood.\\n\\n\\n== Cast ==\\nCrispin Glover as Crispin\\nJill Schoelen as Jill\\nNicolas Cage as Nicolas (credited as Nicolas Coppola)\\nJulie Piekarski as Julie\\nKevin Cortes as Kevin\\nLisa Hope Ross as Lisa\\nDavid Rambo as David\\nJanet Robin as Janet\\nJackie Mason as Mr. O'Reilly\\nBetty Glover as Crispin's Mother\\n\\n\\n== External links ==\\nThe Best of Times at IMDb\", start_char_idx=0, end_char_idx=775, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8675111072720709), NodeWithScore(node=TextNode(id_='c45eea83-3dce-467f-ba41-0eee51210e0a', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={: RelatedNodeInfo(node_id='90329e81-b998-4852-9749-1e0578a1c0f9', node_type=, metadata={}, hash='bd8d403a56276363f7c740bd2bf0063238d599ff367d16c35a896ace089a6588'), : RelatedNodeInfo(node_id='669fa13f-f82f-45d2-a216-f34704b4faf5', node_type=, metadata={}, hash='06377fc11f3f02bbb8f159c545209f9b6231f5c16fa14c5839cd9356d5d02cff'), : RelatedNodeInfo(node_id='bb679bc1-3d84-4cec-93f1-7d89cefc9a8d', node_type=, metadata={}, hash='9f4f8269ee0bf96c60de9cdc4b480ab75cff194ea53c683a95a259eaf948255b')}, hash='3b8df75613a3c41579eb78d7b5338a4916187c9bdbd55efc5c53c87d31abd0ad', text='His outburst was met with \"silence in the car\". By this stage of his career, Coppola had already directed Marlon Brando, Al Pacino, Gene Hackman and Robert De Niro. Although early in his career Cage appeared in some of his uncle\\'s films, he changed his name to Nicolas Cage to avoid the appearance of nepotism as Coppola\\'s nephew. His choice of name was inspired by the Marvel Comics superhero Luke Cage and composer John Cage.\\n\\n\\n== Career ==\\n\\n\\n=== 1981–1988: Early work and breakthrough ===\\nCage made his acting debut in the 1981 television pilot The Best of Times, which was never picked up by ABC. His film debut followed in 1982, with a minor role as an unnamed co-worker of Judge Reinhold\\'s character in the coming-of-age film Fast Times at Ridgemont High, having originally auditioned for Reinhold\\'s part. His experience on the film was marred by cast members endlessly quoting his uncle\\'s films, which inspired him to change his name.Cage\\'s first starring role came opposite Deborah Foreman in the romantic comedy Valley Girl (1983), in which he played a punk who falls in love with the titular valley girl, a plot loosely inspired by Romeo and Juliet.', start_char_idx=3515, end_char_idx=4674, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8232731616148251)], metadata={'cfef3e14-7f7f-491a-8168-e5ca6299d6e2': {}, 'c45eea83-3dce-467f-ba41-0eee51210e0a': {}}))]" 259 | ] 260 | }, 261 | "execution_count": 26, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "sub_qa_q1" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "### Query 2" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 10, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "\u001b[1;3;33m> Current query: Compare the education received by Nicolas Cage and Leonardo DiCaprio.\n", 287 | "\u001b[0m\u001b[1;3;38;5;200m> New query: What educational institutions did Nicolas Cage and Leonardo DiCaprio attend?\n", 288 | "\u001b[0m\u001b[1;3;33m> Current query: Compare the education received by Nicolas Cage and Leonardo DiCaprio.\n", 289 | "\u001b[0m\u001b[1;3;38;5;200m> New query: What educational institutions did Nicolas Cage attend?\n", 290 | "\u001b[0m\u001b[1;3;33m> Current query: Compare the education received by Nicolas Cage and Leonardo DiCaprio.\n", 291 | "\u001b[0m\u001b[1;3;38;5;200m> New query: What type of education did Nicolas Cage and Leonardo DiCaprio receive?\n", 292 | "\u001b[0mNicolas Cage received education in the field of theater, film, and television at UCLA School of Theater, Film and Television. On the other hand, Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies, Seeds Elementary School, and John Marshall High School. However, DiCaprio dropped out of high school and later earned a general equivalency diploma.\n", 293 | "[(' What educational institutions did Nicolas Cage and Leonardo DiCaprio attend?', ' Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School, and then John Marshall High School. Nicolas Cage is not mentioned in the context information.'), (' What educational institutions did Nicolas Cage attend?', ' Nicolas Cage attended UCLA School of Theater, Film and Television.'), (' What type of education did Nicolas Cage and Leonardo DiCaprio receive?', ' Nicolas Cage attended Fast Times at Ridgemont High (1982) and Valley Girl (1983), as well various films by his uncle Francis Ford Coppola such as Rumble Fish (1983), The Cotton Club (1984), and Peggy Sue Got Married (1986). Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School, he later enrolled at the John Marshall High School. DiCaprio dropped out of high school later, eventually earning a general equivalency diploma.')]\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "response_gpt3 = query_engine.query(\n", 299 | " \"Compare the education received by Nicolas Cage and Leonardo DiCaprio.\",\n", 300 | ")\n", 301 | "print(str(response_gpt3))\n", 302 | "sub_qa = response_gpt3.metadata[\"sub_qa\"]\n", 303 | "tuples = [(t[0], t[1].response) for t in sub_qa]\n", 304 | "print(tuples)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 11, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "(' What educational institutions did Nicolas Cage attend?',\n", 316 | " Response(response=' Nicolas Cage attended UCLA School of Theater, Film and Television.', source_nodes=[NodeWithScore(node=TextNode(id_='347ca8de-0eb4-4215-9f05-32671276ce99', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={: RelatedNodeInfo(node_id='a9d91101-72a6-4a53-bb67-80fbcf63cd6f', node_type=, metadata={}, hash='bd8d403a56276363f7c740bd2bf0063238d599ff367d16c35a896ace089a6588'), : RelatedNodeInfo(node_id='f4a7a9b7-cff1-4090-befb-77415c87c448', node_type=, metadata={}, hash='0144e0f063f964c00b07bfe140e2d95582c5a462b2e50e3bc6dc6ebd51fc45c2')}, hash='3cf5fc4e01c79e88435e9b9c837fda9d4b1a4b50bc8f3a29423b1c4a2d8c5e7a', text='Nicolas Kim Coppola (born January 7, 1964), known by his stage name Nicolas Cage, is an American actor and film producer. He is the recipient of various accolades, including an Academy Award, a Screen Actors Guild Award, and a Golden Globe Award. Known for his versatility as an actor, his participation in various film genres has gained him a cult following.Born into the Coppola family, Cage began his career in films such as Fast Times at Ridgemont High (1982) and Valley Girl (1983), as well various films by his uncle Francis Ford Coppola such as Rumble Fish (1983), The Cotton Club (1984), and Peggy Sue Got Married (1986). He earned critical success for his roles in Moonstruck (1987) and Raising Arizona (1987), and earned an Academy Award for Best Actor for his performance in the dramatic film Leaving Las Vegas (1995). He received another Academy Award nomination for his performance as twins Charlie and Donald Kaufman in the comedy-drama film Adaptation (2002).', start_char_idx=0, end_char_idx=975, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8251487835811767), NodeWithScore(node=TextNode(id_='161c6223-3558-40a8-bfab-9f304fd169da', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={: RelatedNodeInfo(node_id='a9d91101-72a6-4a53-bb67-80fbcf63cd6f', node_type=, metadata={}, hash='bd8d403a56276363f7c740bd2bf0063238d599ff367d16c35a896ace089a6588'), : RelatedNodeInfo(node_id='bd54fa34-968f-4e1b-820d-e9c434359f4a', node_type=, metadata={}, hash='4c0887661554366c7ad3c5c2d04d99898e31081f4b7d5a690ffa60b76d5a791a'), : RelatedNodeInfo(node_id='57a5d4f8-f191-4abf-9adb-905e1e85a65a', node_type=, metadata={}, hash='7ff2dfa6446c234ed67fc726db050efbb3306e952c8920e12d9bbf6d8836ed79')}, hash='34607fa7fcaff127a7d22d4fc1c7a5a24b64fd072ad320ae5c68429bd5b5215b', text='He aspired to act from an early age and also attended UCLA School of Theater, Film and Television. His first non-cinematic acting experience was in a school production of Golden Boy. He said he started acting because he \"wanted to be James Dean. I saw him in Rebel Without a Cause, East of Eden. Nothing affected me—no rock song, no classical music—the way Dean affected me in Eden. It blew my mind. I was like, \\'That\\'s what I want to do\\'.\"At age 15, he tried to convince his uncle, Francis Ford Coppola, to give him a screen test, telling him \"I\\'ll show you acting.\" His outburst was met with \"silence in the car\". By this stage of his career, Coppola had already directed Marlon Brando, Al Pacino, Gene Hackman and Robert De Niro. Although early in his career Cage appeared in some of his uncle\\'s films, he changed his name to Nicolas Cage to avoid the appearance of nepotism as Coppola\\'s nephew. His choice of name was inspired by the Marvel Comics superhero Luke Cage and composer John Cage.\\n\\n\\n== Career ==', start_char_idx=2947, end_char_idx=3957, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.818231078207671)], metadata={'347ca8de-0eb4-4215-9f05-32671276ce99': {}, '161c6223-3558-40a8-bfab-9f304fd169da': {}}))" 317 | ] 318 | }, 319 | "execution_count": 11, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "sub_qa[1]" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "# Sub Question Query Engine" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 3, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "name": "stdout", 342 | "output_type": "stream", 343 | "text": [ 344 | "**********\n", 345 | "Trace: index_construction\n", 346 | " |_node_parsing -> 0.116351 seconds\n", 347 | " |_chunking -> 0.041108 seconds\n", 348 | " |_chunking -> 0.000479 seconds\n", 349 | " |_chunking -> 0.051968 seconds\n", 350 | " |_embedding -> 1.981157 seconds\n", 351 | " |_embedding -> 1.945409 seconds\n", 352 | " |_embedding -> 1.949405 seconds\n", 353 | " |_embedding -> 0.707027 seconds\n", 354 | "**********\n" 355 | ] 356 | } 357 | ], 358 | "source": [ 359 | "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n", 360 | "from llama_index.tools import QueryEngineTool, ToolMetadata\n", 361 | "from llama_index.query_engine import SubQuestionQueryEngine\n", 362 | "from llama_index.callbacks import CallbackManager, LlamaDebugHandler\n", 363 | "from llama_index import ServiceContext\n", 364 | "\n", 365 | "llama_debug = LlamaDebugHandler(print_trace_on_end=True)\n", 366 | "callback_manager = CallbackManager([llama_debug])\n", 367 | "service_context = ServiceContext.from_defaults(\n", 368 | " callback_manager=callback_manager,chunk_size=256, chunk_overlap=0\n", 369 | ")\n", 370 | "# build index and query engine\n", 371 | "vector_query_engine = VectorStoreIndex.from_documents(\n", 372 | " documents, use_async=False, service_context=service_context\n", 373 | ").as_query_engine(similarity_top_k=5)\n", 374 | "# setup base query engine as tool\n", 375 | "query_engine_tools = [\n", 376 | " QueryEngineTool(\n", 377 | " query_engine=vector_query_engine,\n", 378 | " metadata=ToolMetadata(\n", 379 | " name=\"Sub-question query engine\",\n", 380 | " description=\"Questions about actors\",\n", 381 | " ),\n", 382 | " ),\n", 383 | "]\n", 384 | "\n", 385 | "query_engine = SubQuestionQueryEngine.from_defaults(\n", 386 | " query_engine_tools=query_engine_tools,\n", 387 | " service_context=service_context,\n", 388 | " use_async= False\n", 389 | ")" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "### Query 1" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 4, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "name": "stdout", 406 | "output_type": "stream", 407 | "text": [ 408 | "Generated 1 sub questions.\n", 409 | "\u001b[1;3;38;2;237;90;200m[Sub-question query engine] Q: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 410 | "\u001b[0m\u001b[1;3;38;2;237;90;200m[Sub-question query engine] A: The pilot that marked the acting debut of Nicolas Cage was directed by an unknown director as the name of the director is not mentioned in the given context.\n", 411 | "\u001b[0m**********\n", 412 | "Trace: query\n", 413 | " |_query -> 4.874696 seconds\n", 414 | " |_llm -> 2.056079 seconds\n", 415 | " |_sub_question -> 1.997166 seconds\n", 416 | " |_query -> 1.996899 seconds\n", 417 | " |_retrieve -> 0.353177 seconds\n", 418 | " |_embedding -> 0.335905 seconds\n", 419 | " |_synthesize -> 1.643538 seconds\n", 420 | " |_templating -> 1.5e-05 seconds\n", 421 | " |_llm -> 1.638972 seconds\n", 422 | " |_synthesize -> 0.820649 seconds\n", 423 | " |_templating -> 1.9e-05 seconds\n", 424 | " |_llm -> 0.818867 seconds\n", 425 | "**********\n", 426 | "The director of the pilot that marked the acting debut of Nicolas Cage is unknown as the name of the director is not mentioned in the given context.\n", 427 | "Sub Question 0: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 428 | "Answer: The pilot that marked the acting debut of Nicolas Cage was directed by an unknown director as the name of the director is not mentioned in the given context.\n", 429 | "====================================\n" 430 | ] 431 | } 432 | ], 433 | "source": [ 434 | "response = query_engine.query(\n", 435 | " \"Who directed the pilot that marked the acting debut of Nicolas Cage?\"\n", 436 | ")\n", 437 | "\n", 438 | "print(response)\n", 439 | "\n", 440 | "# iterate through sub_question items captured in SUB_QUESTION event\n", 441 | "from llama_index.callbacks.schema import CBEventType, EventPayload\n", 442 | "\n", 443 | "for i, (start_event, end_event) in enumerate(\n", 444 | " llama_debug.get_event_pairs(CBEventType.SUB_QUESTION)\n", 445 | "):\n", 446 | " qa_pair = end_event.payload[EventPayload.SUB_QUESTION]\n", 447 | " print(\"Sub Question \" + str(i) + \": \" + qa_pair.sub_q.sub_question.strip())\n", 448 | " print(\"Answer: \" + qa_pair.answer.strip())\n", 449 | " print(\"====================================\")" 450 | ] 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "### Query 2" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 14, 462 | "metadata": {}, 463 | "outputs": [ 464 | { 465 | "name": "stdout", 466 | "output_type": "stream", 467 | "text": [ 468 | "Generated 2 sub questions.\n", 469 | "\u001b[1;3;38;2;237;90;200m[leo and nic] Q: What is the education of Nicolas Cage?\n", 470 | "\u001b[0m\u001b[1;3;38;2;237;90;200m[leo and nic] A: Nicolas Cage attended UCLA School of Theater, Film and Television.\n", 471 | "\u001b[0m\u001b[1;3;38;2;90;149;237m[leo and nic] Q: What is the education of Leonardo DiCaprio?\n", 472 | "\u001b[0m\u001b[1;3;38;2;90;149;237m[leo and nic] A: Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School. He later enrolled at the John Marshall High School but dropped out to pursue acting. He eventually earned a general equivalency diploma.\n", 473 | "\u001b[0m**********\n", 474 | "Trace: query\n", 475 | " |_query -> 6.600185 seconds\n", 476 | " |_llm -> 1.713314 seconds\n", 477 | " |_sub_question -> 1.408598 seconds\n", 478 | " |_query -> 1.408146 seconds\n", 479 | " |_retrieve -> 0.414056 seconds\n", 480 | " |_embedding -> 0.382935 seconds\n", 481 | " |_synthesize -> 0.993901 seconds\n", 482 | " |_templating -> 1.7e-05 seconds\n", 483 | " |_llm -> 0.987334 seconds\n", 484 | " |_sub_question -> 1.800564 seconds\n", 485 | " |_query -> 1.79982 seconds\n", 486 | " |_retrieve -> 0.342561 seconds\n", 487 | " |_embedding -> 0.326615 seconds\n", 488 | " |_synthesize -> 1.457079 seconds\n", 489 | " |_templating -> 2.5e-05 seconds\n", 490 | " |_llm -> 1.453263 seconds\n", 491 | " |_synthesize -> 1.676812 seconds\n", 492 | " |_templating -> 1.7e-05 seconds\n", 493 | " |_llm -> 1.674988 seconds\n", 494 | "**********\n", 495 | "Nicolas Cage attended UCLA School of Theater, Film and Television, while Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies, Seeds Elementary School, and later John Marshall High School before dropping out to pursue acting. DiCaprio eventually earned a general equivalency diploma.\n" 496 | ] 497 | } 498 | ], 499 | "source": [ 500 | "llama_debug = LlamaDebugHandler(print_trace_on_end=True)\n", 501 | "callback_manager = CallbackManager([llama_debug])\n", 502 | "\n", 503 | "response = query_engine.query(\n", 504 | " \"Compare the education received by Nicolas Cage and Leonardo DiCaprio.\"\n", 505 | ")\n", 506 | "\n", 507 | "print(response)\n", 508 | "\n", 509 | "# iterate through sub_question items captured in SUB_QUESTION event\n", 510 | "from llama_index.callbacks.schema import CBEventType, EventPayload\n", 511 | "\n", 512 | "for i, (start_event, end_event) in enumerate(\n", 513 | " llama_debug.get_event_pairs(CBEventType.SUB_QUESTION)\n", 514 | "):\n", 515 | " qa_pair = end_event.payload[EventPayload.SUB_QUESTION]\n", 516 | " print(\"Sub Question \" + str(i) + \": \" + qa_pair.sub_q.sub_question.strip())\n", 517 | " print(\"Answer: \" + qa_pair.answer.strip())\n", 518 | " print(\"====================================\")" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "# HyDE Query Transform" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 15, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "from llama_index.indices.query.query_transform import HyDEQueryTransform\n", 535 | "from llama_index.query_engine.transform_query_engine import (\n", 536 | " TransformQueryEngine,\n", 537 | ")\n", 538 | "\n", 539 | "index = VectorStoreIndex.from_documents(documents, service_context=service_context_gpt3)\n", 540 | "query_engine = index.as_query_engine(similarity_top_k=5)\n", 541 | "\n", 542 | "hyde = HyDEQueryTransform(include_original=True)\n", 543 | "hyde_query_engine = TransformQueryEngine(query_engine, hyde)" 544 | ] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": {}, 549 | "source": [ 550 | "### Query 1" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 16, 556 | "metadata": {}, 557 | "outputs": [ 558 | { 559 | "name": "stdout", 560 | "output_type": "stream", 561 | "text": [ 562 | " The Best of Times, the television pilot that marked the acting debut of Nicolas Cage, was not picked up by ABC.\n" 563 | ] 564 | } 565 | ], 566 | "source": [ 567 | "response = hyde_query_engine.query(\"Who directed the pilot that marked the acting debut of Nicolas Cage?\")\n", 568 | "print(response)" 569 | ] 570 | }, 571 | { 572 | "cell_type": "markdown", 573 | "metadata": {}, 574 | "source": [ 575 | "In this example, HyDE improves output quality significantly, by hallucinating.\n", 576 | "Hypothetical Document Embeddings (HyDE) query transform.\n", 577 | "\n", 578 | "It uses an LLM to generate hypothetical answer(s) to a given query,\n", 579 | "and use the resulting documents as embedding strings.\n", 580 | "\n", 581 | "As described in [Precise Zero-Shot Dense Retrieval without Relevance Labels]\n", 582 | "(https://arxiv.org/abs/2212.10496)\n" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 17, 588 | "metadata": {}, 589 | "outputs": [ 590 | { 591 | "data": { 592 | "text/plain": [ 593 | "'The pilot that marked the acting debut of Nicolas Cage was directed by his uncle, Francis Ford Coppola. This significant moment in Cage\\'s career occurred in 1981 when he was cast in the television movie \"Best of Times.\" Coppola, a renowned filmmaker himself, took on the role of director for this project, showcasing his support and belief in his nephew\\'s talent. The pilot served as a stepping stone for Cage, propelling him into the world of acting and setting the stage for his future success in the industry. With Coppola\\'s guidance, Cage was able to make a memorable first impression, laying the foundation for his illustrious career as one of Hollywood\\'s most versatile and acclaimed actors.'" 594 | ] 595 | }, 596 | "execution_count": 17, 597 | "metadata": {}, 598 | "output_type": "execute_result" 599 | } 600 | ], 601 | "source": [ 602 | "query_bundle = hyde(\"Who directed the pilot that marked the acting debut of Nicolas Cage?\")\n", 603 | "hyde_doc = query_bundle.embedding_strs[0]\n", 604 | "hyde_doc" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "### Query 2" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 18, 617 | "metadata": {}, 618 | "outputs": [ 619 | { 620 | "name": "stdout", 621 | "output_type": "stream", 622 | "text": [ 623 | "\n", 624 | "Nicolas Cage attended UCLA School of Theater, Film and Television and had his first non-cinematic acting experience in a school production of Golden Boy. Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School, and later enrolled at the John Marshall High School. DiCaprio disliked public school and wanted to audition for acting jobs instead. He dropped out of high school later, eventually earning a general equivalency diploma.\n" 625 | ] 626 | } 627 | ], 628 | "source": [ 629 | "response = hyde_query_engine.query(\"Compare the education received by Nicolas Cage and Leonardo DiCaprio.\")\n", 630 | "print(response)" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": 19, 636 | "metadata": {}, 637 | "outputs": [ 638 | { 639 | "data": { 640 | "text/plain": [ 641 | "\"Nicolas Cage and Leonardo DiCaprio, two renowned actors in Hollywood, have both achieved great success in their careers. However, when it comes to their education, they have taken different paths. \\n\\nNicolas Cage, born Nicolas Kim Coppola, comes from a family deeply rooted in the entertainment industry. Despite his family's background, Cage decided to pursue a formal education in acting. He attended the prestigious Beverly Hills High School, known for its strong performing arts program. During his time there, Cage honed his acting skills and participated in various school productions. After graduating, he continued his education at the American Conservatory Theater in San Francisco, where he further refined his craft. Cage's dedication to his education undoubtedly played a significant role in shaping his acting abilities and contributed to his successful career.\\n\\nOn the other hand, Leonardo DiCaprio's educational journey took a different route. Born in Los Angeles, DiCaprio grew up in a modest family. He attended Los Angeles Center for Enriched Studies, a public magnet school known for its rigorous academic curriculum. Although DiCaprio showed an early interest in acting, he did not receive formal training during his high school years. Instead, he focused on his studies and graduated with honors. DiCaprio's passion for acting led him to pursue a career in the entertainment industry without a formal education in the field.\\n\\nDespite their differing educational backgrounds, both Cage and DiCaprio have achieved immense success in their acting careers. Cage's formal training has allowed him to showcase his versatility in various roles, earning him critical acclaim and an Academy Award for Best Actor. DiCaprio, on the other hand, has relied on his natural talent and dedication to his craft to become one of the most respected actors of his generation, also winning an Academy Award for Best Actor.\\n\\nIn conclusion, while Nicolas Cage pursued a formal education in acting, attending renowned institutions, Leonardo DiCaprio relied on his natural talent and dedication to his craft. Both actors have proven that success in the entertainment industry can be achieved through different educational paths, showcasing the importance of passion and hard work in pursuing one's dreams.\"" 642 | ] 643 | }, 644 | "execution_count": 19, 645 | "metadata": {}, 646 | "output_type": "execute_result" 647 | } 648 | ], 649 | "source": [ 650 | "query_bundle = hyde(\"Compare the education received by Nicolas Cage and Leonardo DiCaprio.\")\n", 651 | "hyde_doc = query_bundle.embedding_strs[0]\n", 652 | "hyde_doc" 653 | ] 654 | } 655 | ], 656 | "metadata": { 657 | "kernelspec": { 658 | "display_name": "llama-test", 659 | "language": "python", 660 | "name": "python3" 661 | }, 662 | "language_info": { 663 | "codemirror_mode": { 664 | "name": "ipython", 665 | "version": 3 666 | }, 667 | "file_extension": ".py", 668 | "mimetype": "text/x-python", 669 | "name": "python", 670 | "nbconvert_exporter": "python", 671 | "pygments_lexer": "ipython3", 672 | "version": "3.10.13" 673 | } 674 | }, 675 | "nbformat": 4, 676 | "nbformat_minor": 2 677 | } 678 | -------------------------------------------------------------------------------- /router.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import logging\n", 10 | "import sys\n", 11 | "\n", 12 | "import nest_asyncio\n", 13 | "from llama_index import (\n", 14 | " VectorStoreIndex,\n", 15 | " ServiceContext,\n", 16 | " download_loader,\n", 17 | ")\n", 18 | "from llama_index.callbacks import CallbackManager, LlamaDebugHandler\n", 19 | "from llama_index.embeddings import OpenAIEmbedding\n", 20 | "from llama_index.embeddings.openai import OpenAIEmbeddingModelType\n", 21 | "from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform\n", 22 | "from llama_index.llms import OpenAI\n", 23 | "from llama_index.query_engine import SubQuestionQueryEngine, RouterQueryEngine, MultiStepQueryEngine\n", 24 | "from llama_index.selectors.pydantic_selectors import PydanticSingleSelector\n", 25 | "from llama_index.tools import QueryEngineTool, ToolMetadata\n", 26 | "\n", 27 | "# Set the logging level for openai to ERROR to suppress informational messages\n", 28 | "logging.getLogger('openai').setLevel(logging.ERROR)\n", 29 | "logging.getLogger('requests').setLevel(logging.ERROR)\n", 30 | "logging.getLogger('urllib3').setLevel(logging.ERROR)\n", 31 | "logging.basicConfig(level=logging.WARNING)\n", 32 | "logging.getLogger(\"httpx\").setLevel(logging.WARNING)\n", 33 | "\n", 34 | "nest_asyncio.apply()\n", 35 | "\n", 36 | "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", 37 | "logging.getLogger().handlers = []\n", 38 | "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 7, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "**********\n", 51 | "Trace: index_construction\n", 52 | " |_node_parsing -> 0.088957 seconds\n", 53 | " |_chunking -> 0.029358 seconds\n", 54 | " |_chunking -> 0.000334 seconds\n", 55 | " |_chunking -> 0.038476 seconds\n", 56 | " |_embedding -> 1.885996 seconds\n", 57 | " |_embedding -> 3.587934 seconds\n", 58 | " |_embedding -> 1.537834 seconds\n", 59 | " |_embedding -> 0.648983 seconds\n", 60 | "**********\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "# load documents\n", 66 | "WikipediaReader = download_loader(\"WikipediaReader\")\n", 67 | "\n", 68 | "loader = WikipediaReader()\n", 69 | "pages = ['Nicolas_Cage', 'The_Best_of_Times_(1981_film)', 'Leonardo DiCaprio']\n", 70 | "documents = loader.load_data(pages=pages, auto_suggest=False, redirect = False)\n", 71 | "\n", 72 | "# initialize service context (set chunk size)\n", 73 | "gpt3 = OpenAI(temperature=0, model=\"text-davinci-003\")\n", 74 | "\n", 75 | "embed_model = OpenAIEmbedding(model= OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002)\n", 76 | "\n", 77 | "service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3, chunk_size = 256, chunk_overlap=0, embed_model=embed_model)\n", 78 | "\n", 79 | "\n", 80 | "# simple retriever\n", 81 | "simple_index = VectorStoreIndex.from_documents(documents, service_context=service_context_gpt3)\n", 82 | "simple_query_engine = simple_index.as_query_engine()\n", 83 | "\n", 84 | "\n", 85 | "simple_tool = QueryEngineTool.from_defaults(\n", 86 | " query_engine=simple_query_engine,\n", 87 | " description=\"Useful when the query is relatively straightforward and can be answered with direct information retrieval, without the need for complex transformations.\",\n", 88 | ")\n", 89 | "\n", 90 | "# multi-step query\n", 91 | "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(\n", 92 | " llm=gpt3, verbose=True\n", 93 | ")\n", 94 | "index_summary = \"Breaks down the initial query\"\n", 95 | "\n", 96 | "multi_step_query_engine = simple_index.as_query_engine(service_context=service_context_gpt3)\n", 97 | "\n", 98 | "multi_step_query_engine = MultiStepQueryEngine(\n", 99 | " query_engine=multi_step_query_engine,\n", 100 | " query_transform=step_decompose_transform_gpt3,\n", 101 | " index_summary=index_summary,\n", 102 | ")\n", 103 | "\n", 104 | "multi_step_tool = QueryEngineTool.from_defaults(\n", 105 | " query_engine=multi_step_query_engine,\n", 106 | " description=\"Useful when complex or multifaceted information needs are present, and a single query isn't sufficient to fully understand or retrieve the necessary information. This approach is especially beneficial in environments where the context evolves with each interaction or where the information is layered and requires iterative exploration.\",\n", 107 | ")\n", 108 | "\n", 109 | "# sub-question query engine\n", 110 | "llama_debug = LlamaDebugHandler(print_trace_on_end=True)\n", 111 | "callback_manager = CallbackManager([llama_debug])\n", 112 | "\n", 113 | "service_context_sub_question = ServiceContext.from_defaults(\n", 114 | " callback_manager=callback_manager,chunk_size=256, chunk_overlap=0\n", 115 | ")\n", 116 | "\n", 117 | "vector_query_engine_sub_question = VectorStoreIndex.from_documents(\n", 118 | " documents, use_async=False, service_context=service_context_sub_question\n", 119 | ").as_query_engine(similarity_top_k=5)\n", 120 | "\n", 121 | "query_engine_tools = [\n", 122 | " QueryEngineTool(\n", 123 | " query_engine=vector_query_engine_sub_question,\n", 124 | " metadata=ToolMetadata(\n", 125 | " name=\"Sub-question query engine\",\n", 126 | " description=\"Questions about actors\",\n", 127 | " ),\n", 128 | " ),\n", 129 | "]\n", 130 | "\n", 131 | "query_engine_sub_question = SubQuestionQueryEngine.from_defaults(\n", 132 | " query_engine_tools=query_engine_tools,\n", 133 | " service_context=service_context_sub_question,\n", 134 | " use_async= False\n", 135 | ")\n", 136 | "\n", 137 | "sub_question_tool = QueryEngineTool.from_defaults(\n", 138 | " query_engine=query_engine_sub_question,\n", 139 | " description=\"Useful when complex questions can be effectively broken down into simpler sub-questions, each of which can be answered independently. For example if you have to compare two ore more things.\",\n", 140 | ")\n", 141 | "\n", 142 | "\n", 143 | "query_engine = RouterQueryEngine(\n", 144 | " selector=PydanticSingleSelector.from_defaults(),\n", 145 | " query_engine_tools=[\n", 146 | " simple_tool,\n", 147 | " multi_step_tool,\n", 148 | " sub_question_tool,\n", 149 | " ],\n", 150 | ")" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 11, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "Selecting query engine 0: The query is relatively straightforward and can be answered with direct information retrieval..\n", 163 | " Nicolas Cage is an American actor and film producer.\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "response_1 = query_engine.query(\"What is Nicolas Cage's profession?\")\n", 169 | "print(str(response_1))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 23, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "'The query is relatively straightforward and can be answered with direct information retrieval.'" 181 | ] 182 | }, 183 | "execution_count": 23, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "response_1.metadata[\"selector_result\"].selections[0].reason" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 12, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "Selecting query engine 2: The question requires comparing the education received by two individuals, which can be effectively broken down into simpler sub-questions..\n", 202 | "Generated 2 sub questions.\n", 203 | "\u001b[1;3;38;2;237;90;200m[Sub-question query engine] Q: What is the education of Nicolas Cage?\n", 204 | "\u001b[0m\u001b[1;3;38;2;237;90;200m[Sub-question query engine] A: Nicolas Cage attended UCLA School of Theater, Film and Television.\n", 205 | "\u001b[0m\u001b[1;3;38;2;90;149;237m[Sub-question query engine] Q: What is the education of Leonardo DiCaprio?\n", 206 | "\u001b[0m\u001b[1;3;38;2;90;149;237m[Sub-question query engine] A: Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School. He later enrolled at the John Marshall High School, but dropped out later and eventually earned a general equivalency diploma.\n", 207 | "\u001b[0mNicolas Cage received his education at UCLA School of Theater, Film and Television, while Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies, Seeds Elementary School, and John Marshall High School before earning a general equivalency diploma.\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "response_2 = query_engine.query(\"Compare the education received by Nicolas Cage and Leonardo DiCaprio.\")\n", 213 | "print(str(response_2))" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 21, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "'The question requires comparing the education received by two individuals, which can be effectively broken down into simpler sub-questions.'" 225 | ] 226 | }, 227 | "execution_count": 21, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "response_2.metadata[\"selector_result\"].selections[0].reason" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 13, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "Selecting query engine 1: The question requires complex or multifaceted information retrieval, as it involves identifying the director of a specific pilot episode and the acting debut of Nicolas Cage..\n", 246 | "\u001b[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 247 | "\u001b[0m\u001b[1;3;38;5;200m> New query: What was the name of the pilot that marked the acting debut of Nicolas Cage?\n", 248 | "\u001b[0m\u001b[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 249 | "\u001b[0m\u001b[1;3;38;5;200m> New query: Who directed The Best of Times?\n", 250 | "\u001b[0m\u001b[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?\n", 251 | "\u001b[0m\u001b[1;3;38;5;200m> New query: Who was Don Mischer?\n", 252 | "\u001b[0mDon Mischer\n" 253 | ] 254 | } 255 | ], 256 | "source": [ 257 | "response_3 = query_engine.query(\"Who directed the pilot that marked the acting debut of Nicolas Cage?\")\n", 258 | "print(str(response_3))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 24, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "'The question requires complex or multifaceted information retrieval, as it involves identifying the director of a specific pilot episode and the acting debut of Nicolas Cage.'" 270 | ] 271 | }, 272 | "execution_count": 24, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "response_3.metadata[\"selector_result\"].selections[0].reason" 279 | ] 280 | } 281 | ], 282 | "metadata": { 283 | "kernelspec": { 284 | "display_name": "mira-9B0geXr7-py3.10", 285 | "language": "python", 286 | "name": "python3" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 3 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython3", 298 | "version": "3.10.13" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 2 303 | } 304 | --------------------------------------------------------------------------------