├── README.md
├── dataframe_memory_agent.ipynb
├── deci-7b
    ├── DeciLM_7b_chatbot.ipynb
    └── cricket.pdf
├── groq_mxbai+matryoshka
    ├── essay.txt
    └── groq+mixbread+matryoshka.ipynb
├── groq_mxbai
    ├── essay.txt
    └── groq+mixbread.ipynb
├── llama_index_slides
    ├── Explainable AI.pptx
    └── llamapack_slides.ipynb
├── llamaparse_langchain
    ├── llama3+llamaparse+Groq+Mixedbread.ipynb
    └── sahib-cv_can-flowcv.pdf
└── llamindex-raptors
    ├── Llamaindex-Raptor_Semantic.ipynb
    └── iPhone.pdf


/README.md:
--------------------------------------------------------------------------------
1 | # llm-learning


--------------------------------------------------------------------------------
/dataframe_memory_agent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "9fa0d751",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stderr",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "/Users/sahibpreetsingh/miniconda3/envs/sahib_lit/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).\n",
 14 |       "  from pandas.core import (\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
 20 |     "from langchain_core.messages import AIMessage, HumanMessage\n",
 21 |     "from langchain_core.prompts import MessagesPlaceholder\n",
 22 |     "from langchain_experimental.agents.agent_toolkits.pandas.base import _get_functions_single_prompt\n",
 23 |     "from langchain_experimental.tools.python.tool import PythonAstREPLTool\n",
 24 |     "from langchain_openai import ChatOpenAI\n",
 25 |     "import pandas as pd"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "id": "9be5c678",
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "  Jersey Color Team Name Country Name\n",
 39 |       "0          Red    Team A    Country X\n",
 40 |       "1         Blue    Team B    Country Y\n",
 41 |       "2        Green    Team C    Country Z\n",
 42 |       "3       Yellow    Team D    Country W\n",
 43 |       "4        Black    Team E    Country V\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "# Create a temporary DataFrame with sample data\n",
 49 |     "data = {\n",
 50 |     "    \"Jersey Color\": [\"Red\", \"Blue\", \"Green\", \"Yellow\", \"Black\"],\n",
 51 |     "    \"Team Name\": [\"Team A\", \"Team B\", \"Team C\", \"Team D\", \"Team E\"],\n",
 52 |     "    \"Country Name\": [\"Country X\", \"Country Y\", \"Country Z\", \"Country W\", \"Country V\"]\n",
 53 |     "}\n",
 54 |     "\n",
 55 |     "df = pd.DataFrame(data)\n",
 56 |     "\n",
 57 |     "# Display the temporary DataFrame\n",
 58 |     "print(df)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 3,
 64 |    "id": "cdb6ba7a",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# This is a hack\n",
 69 |     "prompt = _get_functions_single_prompt(df)\n",
 70 |     "prompt.input_variables.append(\"chat_history\")\n",
 71 |     "prompt.messages.insert(1, MessagesPlaceholder(variable_name=\"chat_history\"))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "id": "3d7de7d9",
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "\n",
 82 |     "\n",
 83 |     "tools = [PythonAstREPLTool( locals={\"df\": df},description = \"\"\"\n",
 84 |     "A Python shell. Use this to execute python commands. Input should be a valid python command. \n",
 85 |     "Try and break the problem in steps and solve the prblem step by step.\n",
 86 |     "\"\"\")]\n",
 87 |     "\n",
 88 |     "chat_model = ChatOpenAI(model=\"gpt-4-0613\")\n",
 89 |     "agent = create_openai_functions_agent(chat_model, tools, prompt)\n",
 90 |     "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "id": "ea9fd331",
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "chat_history = []\n",
101 |     "def memory_maker(query):\n",
102 |     "    chat_history.extend(\n",
103 |     "    [\n",
104 |     "        HumanMessage(content=query),\n",
105 |     "        AIMessage(content=response[\"output\"])\n",
106 |     "    ],\n",
107 |     "    )\n",
108 |     "    return chat_history"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "id": "bea93399",
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "\n",
122 |       "\n",
123 |       "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
124 |       "\u001b[32;1m\u001b[1;3m\n",
125 |       "Invoking: `python_repl_ast` with `{'query': 'df.shape[0]'}`\n",
126 |       "\n",
127 |       "\n",
128 |       "\u001b[0m\u001b[36;1m\u001b[1;3m5\u001b[0m\u001b[32;1m\u001b[1;3mThe dataframe has 5 rows.\u001b[0m\n",
129 |       "\n",
130 |       "\u001b[1m> Finished chain.\u001b[0m\n",
131 |       "The dataframe has 5 rows.\n"
132 |      ]
133 |     }
134 |    ],
135 |    "source": [
136 |     "\n",
137 |     "\n",
138 |     "query = \"how many rows are in the dataframe?\"\n",
139 |     "\n",
140 |     "response = agent_executor.invoke({\"input\": query, \"chat_history\": chat_history})\n",
141 |     "print(response[\"output\"])"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 7,
147 |    "id": "50bcff04",
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "query='which team has blue jersey'"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 8,
157 |    "id": "b3846a05",
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "\n",
165 |       "\n",
166 |       "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
167 |       "\u001b[32;1m\u001b[1;3m\n",
168 |       "Invoking: `python_repl_ast` with `{'query': \"df[df['Jersey Color'] == 'Blue']['Team Name']\"}`\n",
169 |       "\n",
170 |       "\n",
171 |       "\u001b[0m\u001b[36;1m\u001b[1;3m1    Team B\n",
172 |       "Name: Team Name, dtype: object\u001b[0m\u001b[32;1m\u001b[1;3mThe team with a blue jersey is Team B.\u001b[0m\n",
173 |       "\n",
174 |       "\u001b[1m> Finished chain.\u001b[0m\n",
175 |       "The team with a blue jersey is Team B.\n"
176 |      ]
177 |     }
178 |    ],
179 |    "source": [
180 |     "\n",
181 |     "\n",
182 |     "# query = \"What were we talking about?\"\n",
183 |     "response = agent_executor.invoke({\"input\": query, \"chat_history\": memory_maker(query)})\n",
184 |     "print(response[\"output\"])"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 9,
190 |    "id": "0dd1430d",
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "query=\"and what's the country name of it?\""
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 10,
200 |    "id": "f7e6ca89",
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "\n",
208 |       "\n",
209 |       "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
210 |       "\u001b[32;1m\u001b[1;3m\n",
211 |       "Invoking: `python_repl_ast` with `{'query': \"df.loc[df['Jersey Color'] == 'Blue', 'Country Name'].values[0]\"}`\n",
212 |       "\n",
213 |       "\n",
214 |       "\u001b[0m\u001b[36;1m\u001b[1;3mCountry Y\u001b[0m\u001b[32;1m\u001b[1;3mThe country of the team with a blue jersey is Country Y.\u001b[0m\n",
215 |       "\n",
216 |       "\u001b[1m> Finished chain.\u001b[0m\n",
217 |       "The country of the team with a blue jersey is Country Y.\n"
218 |      ]
219 |     }
220 |    ],
221 |    "source": [
222 |     "response = agent_executor.invoke({\"input\": query, \"chat_history\": memory_maker(query)})\n",
223 |     "print(response[\"output\"])"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "id": "a94781a0",
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": []
233 |   }
234 |  ],
235 |  "metadata": {
236 |   "kernelspec": {
237 |    "display_name": "Python 3 (ipykernel)",
238 |    "language": "python",
239 |    "name": "python3"
240 |   },
241 |   "language_info": {
242 |    "codemirror_mode": {
243 |     "name": "ipython",
244 |     "version": 3
245 |    },
246 |    "file_extension": ".py",
247 |    "mimetype": "text/x-python",
248 |    "name": "python",
249 |    "nbconvert_exporter": "python",
250 |    "pygments_lexer": "ipython3",
251 |    "version": "3.10.13"
252 |   }
253 |  },
254 |  "nbformat": 4,
255 |  "nbformat_minor": 5
256 | }
257 | 


--------------------------------------------------------------------------------
/deci-7b/cricket.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sahibpreetsingh12/llm-learning/09fcf0c127ba8d474765cd0ea292eaddf7185b22/deci-7b/cricket.pdf


--------------------------------------------------------------------------------
/groq_mxbai+matryoshka/essay.txt:
--------------------------------------------------------------------------------
 1 | The 2022 FIFA World Cup was the 22nd FIFA World Cup, the world championship for national football teams organized by FIFA. It took place in Qatar from 20 November to 18 December 2022, after the country was awarded the hosting rights in 2010. It was the first World Cup to be held in the Arab world and Muslim world, and the second held entirely in Asia after the 2002 tournament in South Korea and Japan.[A]
 2 | 
 3 | This tournament was the last with 32 participating teams, with the number of teams being increased to 48 for the 2026 edition. To avoid the extremes of Qatar's hot climate,[B] the event was held in November and December instead of during the traditional months of May, June, or July.[C] It was held over a reduced time frame of 29 days with 64 matches played in eight venues across five cities. Qatar entered the event—their first World Cup—automatically as the host's national team, alongside 31 teams determined by the qualification process.
 4 | 
 5 | Argentina were crowned the champions after winning the final against the title holder France 4–2 on penalties following a 3–3 draw after extra time. It was Argentina's third title and their first since 1986, as well as being the first nation from outside of Europe to win the tournament since 2002. French player Kylian Mbappé became the first player to score a hat-trick in a World Cup final since Geoff Hurst in the 1966 final and won the Golden Boot as he scored the most goals (eight) during the tournament. Argentine captain Lionel Messi was voted the tournament's best player, winning the Golden Ball. The tournament has been considered exceptionally poetic as the capstone of his career, for some commentators fulfilling a previously unmet criterion to be regarded as one of the greatest players of all time.[4] Teammates Emiliano Martínez and Enzo Fernández won the Golden Glove, awarded to the tournament's best goalkeeper; and the Young Player Award, awarded to the tournament's best young player, respectively. With 172 goals, the tournament set a record for the highest number of goals scored in the 32-team format, with every participating team scoring at least one goal. Morocco became the first African nation to top Group stages with 7 points.
 6 | 
 7 | The choice to host the World Cup in Qatar attracted significant criticism, with concerns raised over the country's treatment of migrant workers, women, and members of the LGBT community, as well as Qatar's climate, lack of a strong football culture, scheduling changes, and allegations of bribery for hosting rights and wider FIFA corruption.[D]
 8 | 
 9 | Format
10 | The FIFA World Cup is a professional football tournament held between national football teams, organised by FIFA.[13][14] The tournament, held every four years, was first played in 1930 in Uruguay,[15] and has been contested by 32 teams since the 1998 event.[15] The tournament was contested with eight round-robin groups followed by a knockout round for 16 teams.[16] The defending champions were France, who defeated Croatia 4–2 in the 2018 FIFA World Cup Final.[17][18] The event was scheduled to take place under a reduced length,[19] from 20 November to 18 December in Qatar.[20][21][22] Being held in Qatar, it was the first World Cup tournament to be held in the Arab world.[23] Spectators were not required to follow most COVID-19 pandemic restrictions such as social distancing, wearing masks, and negative tests.[24]
11 | 
12 | Schedule
13 | Unlike previous FIFA World Cups, which are typically played in June and July, because of Qatar's intense summer heat and often fairly high humidity,[2][21][25] the 2022 World Cup was played in November and December.[5][26] As a result, the World Cup was unusually staged in the middle of the seasons of many domestic association football leagues, which started in late July or August, including all of the major European leagues, which had been obliged to incorporate extended breaks into their domestic schedules to accommodate the World Cup. Major European competitions had scheduled their respective competitions group matches to be played before the World Cup, to avoid playing group matches the following year.[27]
14 | 
15 | The match schedule was confirmed by FIFA on 15 July 2020.[28] The group stage was set to begin on 21 November, with four matches every day. Later, the schedule was tweaked by moving the Qatar vs Ecuador game to 20 November, after Qatar lobbied FIFA to allow their team to open the tournament.[29][30][31] The final was played on 18 December 2022, National Day, at Lusail Stadium.[28][32] Unlike previous tournaments where the match venues and kick-off times for each fixture were set prior to the draw, the assignment of group fixtures for each matchday to a specific venue and kick-off time was only made after the final draw, with the teams of each specific fixture known. This was due to the close proximity of the venues, which allowed the organizers to optimize stadium allocation for spectators and kick-off times for television audiences.[28]


--------------------------------------------------------------------------------
/groq_mxbai+matryoshka/groq+mixbread+matryoshka.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "dd6cbc11",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Installations\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "85076393",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# !pip install langchain-groq\n",
 19 |     "# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4\n",
 20 |     "# !pip install shutup\n",
 21 |     "# !pip install sentence-transformers==2.7.0"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "30c21e9a",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Imports"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "id": "5a901409",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "\n",
 40 |     "import os\n",
 41 |     "import shutup\n",
 42 |     "import torch\n",
 43 |     "from typing import List, Dict, Tuple\n",
 44 |     "from langchain_community.document_loaders import TextLoader\n",
 45 |     "from langchain_community.vectorstores import FAISS\n",
 46 |     "from torch import Tensor\n",
 47 |     "from langchain_groq import ChatGroq\n",
 48 |     "from torch.utils.data import Dataset, DataLoader\n",
 49 |     "from sentence_transformers.readers import InputExample\n",
 50 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 51 |     "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
 52 |     "from langchain_core.prompts import ChatPromptTemplate\n",
 53 |     "from langchain.chains import create_retrieval_chain\n",
 54 |     "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
 55 |     "from sentence_transformers import SentenceTransformer\n",
 56 |     "from sentence_transformers.losses import CoSENTLoss, MatryoshkaLoss\n",
 57 |     "shutup.please()"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "id": "c98fedc8",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# Load data\n",
 68 |     "loader = TextLoader(\"essay.txt\")\n",
 69 |     "docs = loader.load()"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "id": "ebc8b42d",
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "\n",
 80 |     "# Split text into chunks \n",
 81 |     "text_splitter = RecursiveCharacterTextSplitter()\n",
 82 |     "documents = text_splitter.split_documents(docs)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "id": "b4df1ca8",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "## GROQ"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "id": "305969cf",
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "groq_key = 'yourgroq-key'"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 6,
106 |    "id": "6f98b5db",
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "matryoshka_dim = 64\n",
111 |     "\n",
112 |     "# Define the embedding model\n",
113 |     "#using mixbread's embedding and in binary mode with truncated (matryoshka embeddings)\n",
114 |     "\n",
115 |     "embeddings = HuggingFaceEmbeddings(model_name=\"mixedbread-ai/mxbai-embed-large-v1\", \n",
116 |     "                                   model_kwargs = {'truncate_dim':matryoshka_dim},\n",
117 |     "                                   encode_kwargs = {'precision': 'binary'})\n"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 7,
123 |    "id": "c1582683",
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "# FAISS the vector store \n",
128 |     "vector = FAISS.from_documents(documents, embeddings)\n",
129 |     "\n",
130 |     "# Define a retriever interface\n",
131 |     "retriever = vector.as_retriever()\n",
132 |     "\n",
133 |     "# Define LLM\n",
134 |     "model = ChatGroq(temperature=0, groq_api_key=groq_key, model_name=\"mixtral-8x7b-32768\")\n",
135 |     "\n",
136 |     "# Define prompt template\n",
137 |     "prompt = ChatPromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n",
138 |     "\n",
139 |     "<context>\n",
140 |     "{context}\n",
141 |     "</context>\n",
142 |     "\n",
143 |     "Question: {input}\"\"\")\n",
144 |     "\n",
145 |     "# Create a retrieval chain to answer questions\n",
146 |     "document_chain = create_stuff_documents_chain(model, prompt)\n",
147 |     "retrieval_chain = create_retrieval_chain(retriever, document_chain)\n"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "id": "cd21df45",
153 |    "metadata": {},
154 |    "source": [
155 |     "## retrieved responses"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 8,
161 |    "id": "0e12ae09",
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "name": "stderr",
166 |      "output_type": "stream",
167 |      "text": [
168 |       "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
169 |       "To disable this warning, you can either:\n",
170 |       "\t- Avoid using `tokenizers` before the fork if possible\n",
171 |       "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
172 |      ]
173 |     },
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "The 2022 FIFA World Cup was held in Qatar, making it the first World Cup to be held in the Arab world and Muslim world, and the second held entirely in Asia after the 2002 tournament in South Korea and Japan.\n",
179 |       "CPU times: user 150 ms, sys: 67.4 ms, total: 217 ms\n",
180 |       "Wall time: 1.02 s\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "%%time\n",
186 |     "response = retrieval_chain.invoke({\"input\": \"where did fifa happened ?\"})\n",
187 |     "print(response[\"answer\"])"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "f7251007",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": []
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3 (ipykernel)",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.10.13"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 5
220 | }
221 | 


--------------------------------------------------------------------------------
/groq_mxbai/essay.txt:
--------------------------------------------------------------------------------
 1 | The 2022 FIFA World Cup was the 22nd FIFA World Cup, the world championship for national football teams organized by FIFA. It took place in Qatar from 20 November to 18 December 2022, after the country was awarded the hosting rights in 2010. It was the first World Cup to be held in the Arab world and Muslim world, and the second held entirely in Asia after the 2002 tournament in South Korea and Japan.[A]
 2 | 
 3 | This tournament was the last with 32 participating teams, with the number of teams being increased to 48 for the 2026 edition. To avoid the extremes of Qatar's hot climate,[B] the event was held in November and December instead of during the traditional months of May, June, or July.[C] It was held over a reduced time frame of 29 days with 64 matches played in eight venues across five cities. Qatar entered the event—their first World Cup—automatically as the host's national team, alongside 31 teams determined by the qualification process.
 4 | 
 5 | Argentina were crowned the champions after winning the final against the title holder France 4–2 on penalties following a 3–3 draw after extra time. It was Argentina's third title and their first since 1986, as well as being the first nation from outside of Europe to win the tournament since 2002. French player Kylian Mbappé became the first player to score a hat-trick in a World Cup final since Geoff Hurst in the 1966 final and won the Golden Boot as he scored the most goals (eight) during the tournament. Argentine captain Lionel Messi was voted the tournament's best player, winning the Golden Ball. The tournament has been considered exceptionally poetic as the capstone of his career, for some commentators fulfilling a previously unmet criterion to be regarded as one of the greatest players of all time.[4] Teammates Emiliano Martínez and Enzo Fernández won the Golden Glove, awarded to the tournament's best goalkeeper; and the Young Player Award, awarded to the tournament's best young player, respectively. With 172 goals, the tournament set a record for the highest number of goals scored in the 32-team format, with every participating team scoring at least one goal. Morocco became the first African nation to top Group stages with 7 points.
 6 | 
 7 | The choice to host the World Cup in Qatar attracted significant criticism, with concerns raised over the country's treatment of migrant workers, women, and members of the LGBT community, as well as Qatar's climate, lack of a strong football culture, scheduling changes, and allegations of bribery for hosting rights and wider FIFA corruption.[D]
 8 | 
 9 | Format
10 | The FIFA World Cup is a professional football tournament held between national football teams, organised by FIFA.[13][14] The tournament, held every four years, was first played in 1930 in Uruguay,[15] and has been contested by 32 teams since the 1998 event.[15] The tournament was contested with eight round-robin groups followed by a knockout round for 16 teams.[16] The defending champions were France, who defeated Croatia 4–2 in the 2018 FIFA World Cup Final.[17][18] The event was scheduled to take place under a reduced length,[19] from 20 November to 18 December in Qatar.[20][21][22] Being held in Qatar, it was the first World Cup tournament to be held in the Arab world.[23] Spectators were not required to follow most COVID-19 pandemic restrictions such as social distancing, wearing masks, and negative tests.[24]
11 | 
12 | Schedule
13 | Unlike previous FIFA World Cups, which are typically played in June and July, because of Qatar's intense summer heat and often fairly high humidity,[2][21][25] the 2022 World Cup was played in November and December.[5][26] As a result, the World Cup was unusually staged in the middle of the seasons of many domestic association football leagues, which started in late July or August, including all of the major European leagues, which had been obliged to incorporate extended breaks into their domestic schedules to accommodate the World Cup. Major European competitions had scheduled their respective competitions group matches to be played before the World Cup, to avoid playing group matches the following year.[27]
14 | 
15 | The match schedule was confirmed by FIFA on 15 July 2020.[28] The group stage was set to begin on 21 November, with four matches every day. Later, the schedule was tweaked by moving the Qatar vs Ecuador game to 20 November, after Qatar lobbied FIFA to allow their team to open the tournament.[29][30][31] The final was played on 18 December 2022, National Day, at Lusail Stadium.[28][32] Unlike previous tournaments where the match venues and kick-off times for each fixture were set prior to the draw, the assignment of group fixtures for each matchday to a specific venue and kick-off time was only made after the final draw, with the teams of each specific fixture known. This was due to the close proximity of the venues, which allowed the organizers to optimize stadium allocation for spectators and kick-off times for television audiences.[28]


--------------------------------------------------------------------------------
/groq_mxbai/groq+mixbread.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "dd6cbc11",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Installations\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 10,
 14 |    "id": "85076393",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# !pip install langchain-groq\n",
 19 |     "# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4\n",
 20 |     "# !pip install shutup"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "id": "30c21e9a",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Imports"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 12,
 34 |    "id": "5a901409",
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "\n",
 39 |     "import os\n",
 40 |     "import shutup\n",
 41 |     "from langchain_community.document_loaders import TextLoader\n",
 42 |     "from langchain_community.vectorstores import FAISS\n",
 43 |     "from langchain_groq import ChatGroq\n",
 44 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 45 |     "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
 46 |     "from langchain_core.prompts import ChatPromptTemplate\n",
 47 |     "from langchain.chains import create_retrieval_chain\n",
 48 |     "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
 49 |     "\n",
 50 |     "shutup.please()"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 13,
 56 |    "id": "c98fedc8",
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Load data\n",
 61 |     "loader = TextLoader(\"essay.txt\")\n",
 62 |     "docs = loader.load()"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 14,
 68 |    "id": "ebc8b42d",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "\n",
 73 |     "# Split text into chunks \n",
 74 |     "text_splitter = RecursiveCharacterTextSplitter()\n",
 75 |     "documents = text_splitter.split_documents(docs)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "id": "b4df1ca8",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "## GROQ"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 20,
 89 |    "id": "305969cf",
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "groq_key = 'your key'"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 16,
 99 |    "id": "6f98b5db",
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "\n",
104 |     "\n",
105 |     "# Define the embedding model\n",
106 |     "#using mixbread's embedding and in binary mode\n",
107 |     "\n",
108 |     "embeddings = HuggingFaceEmbeddings(model_name=\"mixedbread-ai/mxbai-embed-large-v1\",\n",
109 |     "                                   encode_kwargs = {'precision': 'binary'})\n",
110 |     "\n",
111 |     "\n",
112 |     "# FAISS the vector store \n",
113 |     "vector = FAISS.from_documents(documents, embeddings)\n",
114 |     "\n",
115 |     "# Define a retriever interface\n",
116 |     "retriever = vector.as_retriever()\n",
117 |     "\n",
118 |     "# Define LLM\n",
119 |     "model = ChatGroq(temperature=0, groq_api_key=groq_key, model_name=\"mixtral-8x7b-32768\")\n",
120 |     "\n",
121 |     "# Define prompt template\n",
122 |     "prompt = ChatPromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n",
123 |     "\n",
124 |     "<context>\n",
125 |     "{context}\n",
126 |     "</context>\n",
127 |     "\n",
128 |     "Question: {input}\"\"\")\n",
129 |     "\n",
130 |     "# Create a retrieval chain to answer questions\n",
131 |     "document_chain = create_stuff_documents_chain(model, prompt)\n",
132 |     "retrieval_chain = create_retrieval_chain(retriever, document_chain)\n"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "id": "cd21df45",
138 |    "metadata": {},
139 |    "source": [
140 |     "## retrieved responses"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 17,
146 |    "id": "0e12ae09",
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "The 2022 FIFA World Cup was held in Qatar. It was the first World Cup to be held in the Arab world and Muslim world, and the second held entirely in Asia after the 2002 tournament in South Korea and Japan.\n",
154 |       "CPU times: user 88.3 ms, sys: 60.5 ms, total: 149 ms\n",
155 |       "Wall time: 754 ms\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "%%time\n",
161 |     "response = retrieval_chain.invoke({\"input\": \"where did fifa happened ?\"})\n",
162 |     "print(response[\"answer\"])"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "id": "d202acde",
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": []
172 |   }
173 |  ],
174 |  "metadata": {
175 |   "kernelspec": {
176 |    "display_name": "Python 3 (ipykernel)",
177 |    "language": "python",
178 |    "name": "python3"
179 |   },
180 |   "language_info": {
181 |    "codemirror_mode": {
182 |     "name": "ipython",
183 |     "version": 3
184 |    },
185 |    "file_extension": ".py",
186 |    "mimetype": "text/x-python",
187 |    "name": "python",
188 |    "nbconvert_exporter": "python",
189 |    "pygments_lexer": "ipython3",
190 |    "version": "3.10.13"
191 |   }
192 |  },
193 |  "nbformat": 4,
194 |  "nbformat_minor": 5
195 | }
196 | 


--------------------------------------------------------------------------------
/llama_index_slides/Explainable AI.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sahibpreetsingh12/llm-learning/09fcf0c127ba8d474765cd0ea292eaddf7185b22/llama_index_slides/Explainable AI.pptx


--------------------------------------------------------------------------------
/llama_index_slides/llamapack_slides.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "code",
 19 |       "execution_count": 10,
 20 |       "metadata": {
 21 |         "id": "ELGBYVAj_7W9"
 22 |       },
 23 |       "outputs": [],
 24 |       "source": [
 25 |         "# !pip install llama-hub\n",
 26 |         "# !pip install langchain"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "source": [],
 32 |       "metadata": {
 33 |         "id": "6rz061rqA2Kv"
 34 |       },
 35 |       "execution_count": 6,
 36 |       "outputs": []
 37 |     },
 38 |     {
 39 |       "cell_type": "markdown",
 40 |       "source": [
 41 |         "## Imports"
 42 |       ],
 43 |       "metadata": {
 44 |         "id": "M3Hp6jdBIHb7"
 45 |       }
 46 |     },
 47 |     {
 48 |       "cell_type": "code",
 49 |       "source": [
 50 |         "from llama_index import VectorStoreIndex, download_loader\n",
 51 |         "from langchain.agents import initialize_agent, Tool\n",
 52 |         "from langchain.llms import OpenAI\n",
 53 |         "from langchain.chains.conversation.memory import ConversationBufferMemory\n",
 54 |         "from pathlib import Path\n"
 55 |       ],
 56 |       "metadata": {
 57 |         "id": "2Pk8IeE_EL1M"
 58 |       },
 59 |       "execution_count": 36,
 60 |       "outputs": []
 61 |     },
 62 |     {
 63 |       "cell_type": "markdown",
 64 |       "source": [
 65 |         "### LLamaHub"
 66 |       ],
 67 |       "metadata": {
 68 |         "id": "A3Kq-YvRIRKP"
 69 |       }
 70 |     },
 71 |     {
 72 |       "cell_type": "code",
 73 |       "source": [
 74 |         "PptxReader = download_loader(\"PptxReader\")\n",
 75 |         "loader = PptxReader()\n",
 76 |         "documents = loader.load_data(file=Path('/content/Explainable AI.pptx'))"
 77 |       ],
 78 |       "metadata": {
 79 |         "id": "gHeqTz6hAbQO"
 80 |       },
 81 |       "execution_count": 4,
 82 |       "outputs": []
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "source": [
 87 |         "from langchain.llms import OpenAI"
 88 |       ],
 89 |       "metadata": {
 90 |         "id": "tkrCSv8VBO46"
 91 |       },
 92 |       "execution_count": 11,
 93 |       "outputs": []
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "import openai\n",
 99 |         "openai.api_key = \"sk-xxxxxx\""
100 |       ],
101 |       "metadata": {
102 |         "id": "i37um9arBZ3j"
103 |       },
104 |       "execution_count": 14,
105 |       "outputs": []
106 |     },
107 |     {
108 |       "cell_type": "markdown",
109 |       "source": [
110 |         "### Parsing Data in slides as documents"
111 |       ],
112 |       "metadata": {
113 |         "id": "g-ua5vfKIaVj"
114 |       }
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "source": [
119 |         "index = VectorStoreIndex.from_documents(documents)"
120 |       ],
121 |       "metadata": {
122 |         "colab": {
123 |           "base_uri": "https://localhost:8080/"
124 |         },
125 |         "id": "HA31j6HDAbio",
126 |         "outputId": "aa5c7c13-a4bb-42b9-8fd5-aa4dcdfad118"
127 |       },
128 |       "execution_count": 15,
129 |       "outputs": [
130 |         {
131 |           "output_type": "stream",
132 |           "name": "stderr",
133 |           "text": [
134 |             "[nltk_data] Downloading package punkt to /tmp/llama_index...\n",
135 |             "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
136 |           ]
137 |         }
138 |       ]
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "source": [
143 |         "query_engine = index.as_query_engine()"
144 |       ],
145 |       "metadata": {
146 |         "id": "gCroCUvGH4_C"
147 |       },
148 |       "execution_count": 34,
149 |       "outputs": []
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "source": [
154 |         "response = query_engine.query(\"what is explainable ai\")\n",
155 |         "print(response)"
156 |       ],
157 |       "metadata": {
158 |         "colab": {
159 |           "base_uri": "https://localhost:8080/"
160 |         },
161 |         "id": "DuCuJrQbH7Q0",
162 |         "outputId": "a55205ed-bab1-4ee4-ae74-f739f993b074"
163 |       },
164 |       "execution_count": 35,
165 |       "outputs": [
166 |         {
167 |           "output_type": "stream",
168 |           "name": "stdout",
169 |           "text": [
170 |             "Explainable AI is an emerging field in machine learning that focuses on understanding and explaining the decision-making process of AI systems. It aims to address the \"black box\" nature of AI by inspecting and analyzing the steps and models involved in making decisions. Explainable AI seeks to answer questions such as why a specific prediction or decision was made, why alternative actions were not taken, and how the AI system can correct errors. Researchers have been developing tools and techniques, such as the What-if Tool, LIME, and TreeInterpreter, to promote explainable AI. The goal is to provide justifications and explanations for AI decisions, increasing trust and understanding for analysts and consumers.\n"
171 |           ]
172 |         }
173 |       ]
174 |     }
175 |   ]
176 | }


--------------------------------------------------------------------------------
/llamaparse_langchain/llama3+llamaparse+Groq+Mixedbread.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "36aa5bbf",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Installations"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "e95428bd",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# %%capture\n",
 19 |     "# !pip install llama-parse"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "id": "bc306ce2",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Imports"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "id": "8e145c1c",
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import nest_asyncio\n",
 38 |     "\n",
 39 |     "nest_asyncio.apply()\n"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 5,
 45 |    "id": "201ea6f0",
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "\n",
 50 |     "import os\n",
 51 |     "import json\n",
 52 |     "import shutup\n",
 53 |     "from llama_parse import LlamaParse\n",
 54 |     "from langchain_community.document_loaders import TextLoader\n",
 55 |     "from langchain_community.vectorstores import FAISS\n",
 56 |     "from langchain_groq import ChatGroq\n",
 57 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 58 |     "from langchain.chains import create_retrieval_chain\n",
 59 |     "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
 60 |     "from langchain_core.prompts import ChatPromptTemplate\n",
 61 |     "\n",
 62 |     "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
 63 |     "\n",
 64 |     "\n",
 65 |     "shutup.please()"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "id": "c8993613",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "go to llamaparse and make a account here and get a key -> [llamaparse](https://cloud.llamaindex.ai/login)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 6,
 79 |    "id": "caf4f775",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "\n",
 84 |     "# API access to llama-cloud\n",
 85 |     "os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-your-own-key\""
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 7,
 91 |    "id": "81371f62",
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "Started parsing the file under job_id de49e08f-2087-4168-87da-1a5f5eece1da\n"
 99 |      ]
100 |     }
101 |    ],
102 |    "source": [
103 |     "documents = LlamaParse(result_type=\"markdown\").load_data(\"sahib-cv_can-flowcv.pdf\")\n"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "id": "16c6a58b",
109 |    "metadata": {},
110 |    "source": [
111 |     "## Sample output"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 8,
117 |    "id": "e493f7d7",
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "## Sahibpreet Singh\n",
125 |       "\n",
126 |       "Data Scientist\n",
127 |       "\n",
128 |       "ss9334931@gmail.com\n",
129 |       "4 Larkberry Road, Ontario\n",
130 |       "\n",
131 |       "https://www.linkedin.com/in/sahibpreetsinghh/\n",
132 |       "\n",
133 |       "### PROFILE\n",
134 |       "\n",
135 |       "Accomplished Data Scientist specializing in NLP and Conversational AI. Top Kaggle mentor, ranked within the top 0.01%. Expert in cutting-edge models, delivering innovative solutions, and optimizing decision-making with data-driven insights. Strong analytical abilities, exceptional problem-solving skills, and a passion for impactful results.\n",
136 |       "\n",
137 |       "### PROFESSIONAL EXPERIENCE\n",
138 |       "\n",
139 |       "|Company|Date|Position|Location|\n",
140 |       "|---|---|---|---|\n",
141 |       "|Tatras Data|11/2022 – 12/2023|Data Scientist|Chandigarh, India|\n",
142 |       "|- Successfully delivered the product for Text-to-SQL problem using LLM with deployment using Streamlit and with Langchain as Framework.\n",
143 |       "- Led the development of a centralized system aimed at streamlining the creation of contextual chatbots for diverse counties across the United States.\n",
144 |       "- Transformed the chatbot development process by introducing a centralized serv...\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "print(documents[0].text[:1000] + \"...\")"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "id": "08abff5d",
155 |    "metadata": {},
156 |    "source": [
157 |     "## Saving parsed pdf to Txt File"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 9,
163 |    "id": "b4e834fd",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "text_from_pdf = json.loads(documents[0].to_json())['text']\n",
168 |     "with open(\"resume.txt\", \"w\") as text_file:\n",
169 |     "    text_file.write(text_from_pdf)\n"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 10,
175 |    "id": "12607ae6",
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "\n",
180 |     "# Load data\n",
181 |     "loader = TextLoader(\"resume.txt\")\n",
182 |     "docs = loader.load()\n",
183 |     "\n",
184 |     "# Split text into chunks \n",
185 |     "text_splitter = RecursiveCharacterTextSplitter()\n",
186 |     "documents = text_splitter.split_documents(docs)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 11,
192 |    "id": "98c3de78",
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "\n",
197 |     "# Define the embedding model\n",
198 |     "#using mixbread's embedding and in binary mode\n",
199 |     "\n",
200 |     "embeddings = HuggingFaceEmbeddings(model_name=\"mixedbread-ai/mxbai-embed-large-v1\",\n",
201 |     "                                   encode_kwargs = {'precision': 'binary'})\n"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 12,
207 |    "id": "1d533dc2",
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "groq_key = 'gsk_your-own-key'"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 13,
217 |    "id": "72f72f93",
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": [
221 |     "\n",
222 |     "\n",
223 |     "# FAISS the vector store \n",
224 |     "vector = FAISS.from_documents(documents, embeddings)\n",
225 |     "\n",
226 |     "# Define a retriever interface\n",
227 |     "retriever = vector.as_retriever()\n",
228 |     "\n",
229 |     "# Define LLM\n",
230 |     "model = ChatGroq(temperature=0, groq_api_key=groq_key, model_name=\"llama3-8b-8192\")\n",
231 |     "\n",
232 |     "# Define prompt template\n",
233 |     "prompt = ChatPromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n",
234 |     "\n",
235 |     "<context>\n",
236 |     "{context}\n",
237 |     "</context>\n",
238 |     "\n",
239 |     "Question: {input}\"\"\")\n",
240 |     "\n",
241 |     "# Create a retrieval chain to answer questions\n",
242 |     "document_chain = create_stuff_documents_chain(model, prompt)\n",
243 |     "retrieval_chain = create_retrieval_chain(retriever, document_chain)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 15,
249 |    "id": "7fdea614",
250 |    "metadata": {},
251 |    "outputs": [
252 |     {
253 |      "name": "stdout",
254 |      "output_type": "stream",
255 |      "text": [
256 |       "The name of the candidate is Sahibpreet Singh.\n",
257 |       "CPU times: user 118 ms, sys: 352 ms, total: 470 ms\n",
258 |       "Wall time: 2.05 s\n"
259 |      ]
260 |     }
261 |    ],
262 |    "source": [
263 |     "%%time\n",
264 |     "response = retrieval_chain.invoke({\"input\": \"what is the name of the candidate\"})\n",
265 |     "print(response[\"answer\"])"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 16,
271 |    "id": "e94218f7",
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "Based on the provided context, Sahibpreet Singh has had the following jobs:\n",
279 |       "\n",
280 |       "1. Tatras Data (11/2022 - 12/2023) - Data Scientist\n",
281 |       "2. ZS Associates (11/2021 - 11/2022) - Data Science Associate\n",
282 |       "3. Tatras Data (07/2020 - 10/2021) - Junior Data Scientist\n",
283 |       "\n",
284 |       "So, in total, Sahibpreet Singh has had 3 jobs.\n",
285 |       "CPU times: user 125 ms, sys: 428 ms, total: 553 ms\n",
286 |       "Wall time: 2.06 s\n"
287 |      ]
288 |     }
289 |    ],
290 |    "source": [
291 |     "%%time\n",
292 |     "response = retrieval_chain.invoke({\"input\": \"How many number of jobs in total he did?\"})\n",
293 |     "print(response[\"answer\"])"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 17,
299 |    "id": "b1e0c35a",
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "name": "stdout",
304 |      "output_type": "stream",
305 |      "text": [
306 |       "According to the provided context, the first job of the candidate was as a Junior Data Scientist at Tatras Data from July 2020 to October 2021.\n",
307 |       "CPU times: user 94.6 ms, sys: 290 ms, total: 384 ms\n",
308 |       "Wall time: 1.64 s\n"
309 |      ]
310 |     }
311 |    ],
312 |    "source": [
313 |     "%%time\n",
314 |     "response = retrieval_chain.invoke({\"input\": \"what was the first job of candidate?\"})\n",
315 |     "print(response[\"answer\"])"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": null,
321 |    "id": "42a701f0",
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": []
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "id": "deb387e5",
330 |    "metadata": {},
331 |    "outputs": [],
332 |    "source": []
333 |   }
334 |  ],
335 |  "metadata": {
336 |   "kernelspec": {
337 |    "display_name": "Python 3 (ipykernel)",
338 |    "language": "python",
339 |    "name": "python3"
340 |   },
341 |   "language_info": {
342 |    "codemirror_mode": {
343 |     "name": "ipython",
344 |     "version": 3
345 |    },
346 |    "file_extension": ".py",
347 |    "mimetype": "text/x-python",
348 |    "name": "python",
349 |    "nbconvert_exporter": "python",
350 |    "pygments_lexer": "ipython3",
351 |    "version": "3.10.13"
352 |   }
353 |  },
354 |  "nbformat": 4,
355 |  "nbformat_minor": 5
356 | }
357 | 


--------------------------------------------------------------------------------
/llamaparse_langchain/sahib-cv_can-flowcv.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sahibpreetsingh12/llm-learning/09fcf0c127ba8d474765cd0ea292eaddf7185b22/llamaparse_langchain/sahib-cv_can-flowcv.pdf


--------------------------------------------------------------------------------
/llamindex-raptors/Llamaindex-Raptor_Semantic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "ac4efb13",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     " \n",
  9 |     "\n",
 10 |     "# <p style=\"color:Orange;\">Semantic Chunking</p>  +  <p style=\"color:Red;\">Raptors</p>  for RAG"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "id": "d790e63e",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "\n",
 21 |     "# %%capture\n",
 22 |     "# !pip install llama-index-packs-raptor"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "id": "5a2fe7f4",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "To read about Raptors - https://github.com/run-llama/llama_index/tree/main/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "id": "55d79a55",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## Imports"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "id": "3cc76835",
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "import shutup\n",
 49 |     "shutup.please()"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "id": "f9ba5a96",
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "#Imports\n",
 60 |     "from llama_index.llms.openai import OpenAI\n",
 61 |     "import json\n",
 62 |     "from llama_index.embeddings.openai import OpenAIEmbedding\n",
 63 |     "\n",
 64 |     "from llama_index.core import (\n",
 65 |     "    VectorStoreIndex,\n",
 66 |     "    SimpleDirectoryReader,\n",
 67 |     "    load_index_from_storage,\n",
 68 |     "    StorageContext,\n",
 69 |     "    Document,\n",
 70 |     "    ServiceContext,\n",
 71 |     ")\n",
 72 |     "from llama_index.core import Settings\n",
 73 |     "#semantic chunking\n",
 74 |     "from llama_index.core.node_parser import (\n",
 75 |     "    SentenceSplitter,\n",
 76 |     "    SemanticSplitterNodeParser,\n",
 77 |     ")"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 4,
 83 |    "id": "83344ad3",
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "#using llama-parse for parsing complex documents"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "id": "bc212ad2",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "To use llama-parse refer to - https://cloud.llamaindex.ai/landing"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 5,
101 |    "id": "c303fcb7",
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio\n",
106 |     "import nest_asyncio\n",
107 |     "nest_asyncio.apply()\n",
108 |     "\n",
109 |     "import os\n",
110 |     "# API access to llama-cloud\n",
111 |     "os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-\"\n",
112 |     "\n",
113 |     "# Using OpenAI API for embeddings/llms\n",
114 |     "os.environ[\"OPENAI_API_KEY\"] = \"sk-\""
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "id": "e150a60b",
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\")\n",
125 |     "llm = OpenAI(model=\"gpt-3.5-turbo-0125\")\n",
126 |     "\n",
127 |     "Settings.llm = llm\n",
128 |     "Settings.embed_model = embed_model"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 7,
134 |    "id": "ef72421e",
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "Started parsing the file under job_id 678d0d89-482b-4088-8233-e01bd2ae07c9\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "from llama_parse import LlamaParse\n",
147 |     "\n",
148 |     "documents = LlamaParse(result_type=\"markdown\").load_data('/Users/sahibpreetsingh/Downloads/iPhone.pdf')"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "id": "33bf47ab",
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "text_from_pdf = json.loads(documents[0].to_json())['text']"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "id": "61748d56",
164 |    "metadata": {},
165 |    "source": [
166 |     "## Defining service context\n",
167 |     "To reduce the chances of hallucination and set a persona for the CHAT ENGINE"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 9,
173 |    "id": "e0455559",
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "service_context = ServiceContext.from_defaults(llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.2, system_prompt=\"\"\"\n",
178 |     "                                                                    You are an expert Analyser and Professor and your job is to answer \n",
179 |     "                                                                    questions. Assume that all questions are related to the pdf user provided.\n",
180 |     "                                                                    Keep your answers technical and based on facts – do not hallucinate features.\n",
181 |     "                                                                    \"\"\"))"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "id": "0f30229d",
187 |    "metadata": {},
188 |    "source": [
189 |     "## Semantic Chunking and Index creation"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "id": "2a30125c",
195 |    "metadata": {},
196 |    "source": [
197 |     "## To understand semantic chunking :-\n",
198 |     "1. https://docs.llamaindex.ai/en/latest/examples/node_parsers/semantic_chunking/"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 10,
204 |    "id": "91ce858f",
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "embed_model = OpenAIEmbedding()\n",
209 |     "splitter = SemanticSplitterNodeParser(\n",
210 |     "    buffer_size=1, breakpoint_percentile_threshold=100, embed_model=embed_model\n",
211 |     ")\n",
212 |     "\n",
213 |     "# also baseline splitter\n",
214 |     "base_splitter = SentenceSplitter(chunk_size=512)\n",
215 |     "nodes = splitter.get_nodes_from_documents(documents)\n",
216 |     "\n",
217 |     "index = VectorStoreIndex(nodes,service_context=service_context)"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 11,
223 |    "id": "811a52b0",
224 |    "metadata": {},
225 |    "outputs": [
226 |     {
227 |      "name": "stdout",
228 |      "output_type": "stream",
229 |      "text": [
230 |       "Added user message to memory: when was the first iphone realeased and who lauched it\n",
231 |       "=== Calling Function ===\n",
232 |       "Calling function: query_engine_tool with args: {\"input\": \"When was the first iPhone released?\"}\n",
233 |       "Got output: The first iPhone was released on June 29, 2007.\n",
234 |       "========================\n",
235 |       "\n",
236 |       "=== Calling Function ===\n",
237 |       "Calling function: query_engine_tool with args: {\"input\": \"Who launched the first iPhone?\"}\n",
238 |       "Got output: CEO Steve Jobs launched the first iPhone.\n",
239 |       "========================\n",
240 |       "\n"
241 |      ]
242 |     }
243 |    ],
244 |    "source": [
245 |     "query_engine = index.as_chat_engine(chat_mode=\"react\", verbose=True)\n",
246 |     "response = query_engine.query(\"when was the first iphone realeased and who lauched it\")"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 12,
252 |    "id": "10a2e277",
253 |    "metadata": {},
254 |    "outputs": [
255 |     {
256 |      "data": {
257 |       "text/plain": [
258 |        "'The first iPhone was released on June 29, 2007, and it was launched by CEO Steve Jobs.'"
259 |       ]
260 |      },
261 |      "execution_count": 12,
262 |      "metadata": {},
263 |      "output_type": "execute_result"
264 |     }
265 |    ],
266 |    "source": [
267 |     "response.response"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 13,
273 |    "id": "615f493e",
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "name": "stdout",
278 |      "output_type": "stream",
279 |      "text": [
280 |       "Added user message to memory: and what was the initial price of 4GB iphone\n",
281 |       "=== Calling Function ===\n",
282 |       "Calling function: query_engine_tool with args: {\"input\":\"What was the initial price of 4GB iPhone?\"}\n",
283 |       "Got output: The initial price of the 4GB model of the iPhone was $499.\n",
284 |       "========================\n",
285 |       "\n"
286 |      ]
287 |     },
288 |     {
289 |      "data": {
290 |       "text/plain": [
291 |        "'The initial price of the 4GB iPhone was $499.'"
292 |       ]
293 |      },
294 |      "execution_count": 13,
295 |      "metadata": {},
296 |      "output_type": "execute_result"
297 |     }
298 |    ],
299 |    "source": [
300 |     "response = query_engine.query(\"and what was the initial price of 4GB iphone\")\n",
301 |     "response.response"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 14,
307 |    "id": "07d11d81",
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "name": "stdout",
312 |      "output_type": "stream",
313 |      "text": [
314 |       "Added user message to memory: and who conducted a study to show that people can wait to buy their wireless phone just to buy iphone?\n",
315 |       "=== Calling Function ===\n",
316 |       "Calling function: query_engine_tool with args: {\"input\":\"Who conducted a study to show that people can wait to buy their wireless phone just to buy an iPhone?\"}\n",
317 |       "Got output: Sharma and Wingfield conducted a study to show that people can wait to buy their wireless phone just to buy an iPhone.\n",
318 |       "========================\n",
319 |       "\n"
320 |      ]
321 |     }
322 |    ],
323 |    "source": [
324 |     "response = query_engine.query(\"and who conducted a study to show that people can wait to buy their wireless phone just to buy iphone?\")"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 15,
330 |    "id": "8118733e",
331 |    "metadata": {},
332 |    "outputs": [
333 |     {
334 |      "name": "stdout",
335 |      "output_type": "stream",
336 |      "text": [
337 |       "Added user message to memory: by how much apple reduced the iphone price after 2 months of sale?\n",
338 |       "=== Calling Function ===\n",
339 |       "Calling function: query_engine_tool with args: {\"input\":\"Calculate the percentage decrease in the iPhone price after 2 months of sale.\"}\n",
340 |       "Got output: The iPhone price decreased by 33% ($200) after being on the market for only two months.\n",
341 |       "========================\n",
342 |       "\n"
343 |      ]
344 |     },
345 |     {
346 |      "data": {
347 |       "text/plain": [
348 |        "'Apple reduced the iPhone price by 33% ($200) after 2 months of sale.'"
349 |       ]
350 |      },
351 |      "execution_count": 15,
352 |      "metadata": {},
353 |      "output_type": "execute_result"
354 |     }
355 |    ],
356 |    "source": [
357 |     "response = query_engine.query(\"by how much apple reduced the iphone price after 2 months of sale?\",)\n",
358 |     "response.response"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "id": "c253d073",
364 |    "metadata": {},
365 |    "source": [
366 |     "## Raptor Retriever"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 16,
372 |    "id": "d508ba55",
373 |    "metadata": {},
374 |    "outputs": [],
375 |    "source": [
376 |     "from llama_index.packs.raptor import RaptorPack"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 17,
382 |    "id": "64354dc3",
383 |    "metadata": {},
384 |    "outputs": [],
385 |    "source": [
386 |     "from llama_index.packs.raptor import RaptorRetriever\n",
387 |     "\n",
388 |     "retriever = RaptorRetriever(\n",
389 |     "    [],\n",
390 |     "    embed_model=OpenAIEmbedding(\n",
391 |     "        model=\"text-embedding-3-small\"\n",
392 |     "    ),  # used for embedding clusters\n",
393 |     "    llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.1),  # used for generating summaries\n",
394 |     "\n",
395 |     "    existing_index = index,\n",
396 |     "    similarity_top_k=2,  # top k for each layer, or overall top-k for collapsed\n",
397 |     "    mode=\"collapsed\",  # sets default mode\n",
398 |     ")"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 18,
404 |    "id": "f1808e97",
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": [
408 |     "from llama_index.core.query_engine import RetrieverQueryEngine\n",
409 |     "\n",
410 |     "query_engine = RetrieverQueryEngine.from_args(\n",
411 |     "    retriever, temperature=0.1,verbose=True)\n"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 19,
417 |    "id": "83c506d2",
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": [
421 |     "response = query_engine.query(\"when was the first iphone realeased and who lauched it and also what was place i am asking city name and in that city which specific place \",)\n"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": 20,
427 |    "id": "e35af400",
428 |    "metadata": {},
429 |    "outputs": [
430 |     {
431 |      "data": {
432 |       "text/plain": [
433 |        "'The first iPhone was released on June 29, 2007, by Apple Inc. The launch event took place in San Francisco at the Macworld convention.'"
434 |       ]
435 |      },
436 |      "execution_count": 20,
437 |      "metadata": {},
438 |      "output_type": "execute_result"
439 |     }
440 |    ],
441 |    "source": [
442 |     "# str(response)\n",
443 |     "response.response"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 21,
449 |    "id": "501d5811",
450 |    "metadata": {},
451 |    "outputs": [],
452 |    "source": [
453 |     "response = query_engine.query(\"by how much apple reduced the iphone price after 2 months of sale? and what percetange is it?\",)"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": 22,
459 |    "id": "1c508748",
460 |    "metadata": {},
461 |    "outputs": [
462 |     {
463 |      "data": {
464 |       "text/plain": [
465 |        "'Apple reduced the iPhone price by $200 after 2 months of sale, which is a 33% reduction from the original price.'"
466 |       ]
467 |      },
468 |      "execution_count": 22,
469 |      "metadata": {},
470 |      "output_type": "execute_result"
471 |     }
472 |    ],
473 |    "source": [
474 |     "response.response"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": null,
480 |    "id": "bfcd1bb6",
481 |    "metadata": {},
482 |    "outputs": [],
483 |    "source": []
484 |   }
485 |  ],
486 |  "metadata": {
487 |   "kernelspec": {
488 |    "display_name": "Python 3 (ipykernel)",
489 |    "language": "python",
490 |    "name": "python3"
491 |   },
492 |   "language_info": {
493 |    "codemirror_mode": {
494 |     "name": "ipython",
495 |     "version": 3
496 |    },
497 |    "file_extension": ".py",
498 |    "mimetype": "text/x-python",
499 |    "name": "python",
500 |    "nbconvert_exporter": "python",
501 |    "pygments_lexer": "ipython3",
502 |    "version": "3.9.18"
503 |   }
504 |  },
505 |  "nbformat": 4,
506 |  "nbformat_minor": 5
507 | }
508 | 


--------------------------------------------------------------------------------
/llamindex-raptors/iPhone.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sahibpreetsingh12/llm-learning/09fcf0c127ba8d474765cd0ea292eaddf7185b22/llamindex-raptors/iPhone.pdf


--------------------------------------------------------------------------------