├── .gitignore
├── README.md
├── data
    └── text.txt
├── langchain.ipynb
├── requirements.txt
└── llamaindex.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | app
2 | .env
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LLamaIndex vs LangChain
2 | 
3 | Simple RAG System to compare LLamaIndex to LangChain
4 | 


--------------------------------------------------------------------------------
/data/text.txt:
--------------------------------------------------------------------------------
 1 | Q: What makes our pizza unique?
 2 | A: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.
 3 | 
 4 | Q: Do we offer gluten-free pizza options?
 5 | A: Yes, we offer a delicious gluten-free crust option for our guests with dietary restrictions or preferences.
 6 | 
 7 | Q: Can customers create their own pizza?
 8 | A: Absolutely! Customers can choose from a variety of fresh toppings to create their own unique pizza masterpiece.
 9 | 
10 | Q: What are our most popular pizzas?
11 | A: Our most popular pizzas include the Classic Margherita, Pepperoni Supreme, and the Gourmet Vegetarian. Each offers a unique blend of flavors that cater to a variety of tastes.
12 | 
13 | Q: Do we offer vegan pizza options?
14 | A: Yes, we have vegan pizza options which include dairy-free cheese and a variety of fresh vegetable toppings.
15 | 
16 | Q: How long does it take to prepare a pizza?
17 | A: On average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.
18 | 
19 | Q: Can customers order pizza for delivery?
20 | A: Yes, we offer delivery services within a certain radius of our restaurant, ensuring that you can enjoy our pizzas from the comfort of your home.
21 | 


--------------------------------------------------------------------------------
/langchain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_community.document_loaders import DirectoryLoader\n",
 10 |     "\n",
 11 |     "from dotenv import load_dotenv\n",
 12 |     "\n",
 13 |     "load_dotenv()\n",
 14 |     "\n",
 15 |     "loader = DirectoryLoader('data', glob=\"**/*.txt\")\n",
 16 |     "\n",
 17 |     "documents = loader.load()"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "print(documents)\n",
 27 |     "print(documents[0].page_content)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "from langchain.text_splitter import CharacterTextSplitter\n",
 37 |     "\n",
 38 |     "text_splitter = CharacterTextSplitter(\n",
 39 |     "    separator=\"\\n\\n\",\n",
 40 |     "    chunk_size=250,\n",
 41 |     "    chunk_overlap=10,\n",
 42 |     "    length_function=len,\n",
 43 |     "    is_separator_regex=False,\n",
 44 |     ")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "chunks = text_splitter.split_documents(documents)\n",
 54 |     "chunks"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "from langchain_openai import OpenAIEmbeddings\n",
 64 |     "from langchain_community.vectorstores import Chroma\n",
 65 |     "\n",
 66 |     "index = Chroma.from_documents(chunks, OpenAIEmbeddings())"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "retriever = index.as_retriever()"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "retriever.get_relevant_documents(\"How long does it take to prepare a pizza\")"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "from operator import itemgetter\n",
 94 |     "\n",
 95 |     "template = \"\"\"\n",
 96 |     "Answer the question based only on the following context:\n",
 97 |     "{context}\n",
 98 |     "\n",
 99 |     "Answer the following question:\n",
100 |     "Question: {question}\n",
101 |     "\"\"\""
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "from langchain_core.output_parsers import StrOutputParser\n",
111 |     "from langchain_core.prompts import ChatPromptTemplate\n",
112 |     "from langchain_openai import ChatOpenAI\n",
113 |     "\n",
114 |     "prompt = ChatPromptTemplate.from_template(template)\n",
115 |     "model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "rag_chain = (\n",
125 |     "    {\n",
126 |     "        \"context\": itemgetter(\"question\") | retriever,\n",
127 |     "        \"question\": itemgetter(\"question\")\n",
128 |     "    }\n",
129 |     "    | prompt\n",
130 |     "    | model\n",
131 |     "    | StrOutputParser()\n",
132 |     ")"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "rag_chain.invoke({\"question\": \"How long does it take to prepare a pizza\"})"
142 |    ]
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "app",
148 |    "language": "python",
149 |    "name": "python3"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 3
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython3",
161 |    "version": "3.11.0"
162 |   }
163 |  },
164 |  "nbformat": 4,
165 |  "nbformat_minor": 2
166 | }
167 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.9.3
  2 | aiosignal==1.3.1
  3 | annotated-types==0.6.0
  4 | anyio==4.3.0
  5 | asgiref==3.7.2
  6 | asttokens==2.4.1
  7 | async-timeout==4.0.3
  8 | asyncpg==0.29.0
  9 | attrs==23.2.0
 10 | backoff==2.2.1
 11 | bcrypt==4.1.2
 12 | beautifulsoup4==4.12.3
 13 | bs4==0.0.2
 14 | build==1.0.3
 15 | cachetools==5.3.2
 16 | certifi==2024.2.2
 17 | chardet==5.2.0
 18 | charset-normalizer==3.3.2
 19 | chroma-hnswlib==0.7.3
 20 | chromadb==0.4.23
 21 | click==8.1.7
 22 | colorama==0.4.6
 23 | coloredlogs==15.0.1
 24 | comm==0.2.1
 25 | dataclasses-json==0.6.4
 26 | dataclasses-json-speakeasy==0.5.11
 27 | debugpy==1.8.1
 28 | decorator==5.1.1
 29 | Deprecated==1.2.14
 30 | dirtyjson==1.0.8
 31 | distro==1.9.0
 32 | emoji==2.10.1
 33 | executing==2.0.1
 34 | fastapi==0.110.0
 35 | filelock==3.13.1
 36 | filetype==1.2.0
 37 | flatbuffers==23.5.26
 38 | frozenlist==1.4.1
 39 | fsspec==2024.2.0
 40 | google-auth==2.28.1
 41 | googleapis-common-protos==1.62.0
 42 | greenlet==3.0.3
 43 | grpcio==1.62.0
 44 | h11==0.14.0
 45 | httpcore==1.0.4
 46 | httptools==0.6.1
 47 | httpx==0.27.0
 48 | huggingface-hub==0.20.3
 49 | humanfriendly==10.0
 50 | idna==3.6
 51 | importlib-metadata==6.11.0
 52 | importlib_resources==6.1.2
 53 | ipykernel==6.29.2
 54 | ipython==8.22.1
 55 | jedi==0.19.1
 56 | joblib==1.3.2
 57 | jsonpatch==1.33
 58 | jsonpath-python==1.0.6
 59 | jsonpointer==2.4
 60 | jupyter_client==8.6.0
 61 | jupyter_core==5.7.1
 62 | kubernetes==29.0.0
 63 | langchain==0.1.9
 64 | langchain-community==0.0.24
 65 | langchain-core==0.1.27
 66 | langchain-openai==0.0.8
 67 | langdetect==1.0.9
 68 | langsmith==0.1.10
 69 | llama-index==0.10.12
 70 | llama-index-agent-openai==0.1.5
 71 | llama-index-cli==0.1.5
 72 | llama-index-core==0.10.12
 73 | llama-index-embeddings-openai==0.1.6
 74 | llama-index-indices-managed-llama-cloud==0.1.3
 75 | llama-index-legacy==0.9.48
 76 | llama-index-llms-openai==0.1.6
 77 | llama-index-multi-modal-llms-openai==0.1.4
 78 | llama-index-program-openai==0.1.4
 79 | llama-index-question-gen-openai==0.1.3
 80 | llama-index-readers-file==0.1.5
 81 | llama-index-readers-llama-parse==0.1.3
 82 | llama-index-vector-stores-chroma==0.1.4
 83 | llama-index-vector-stores-postgres==0.1.2
 84 | llama-parse==0.3.4
 85 | llamaindex-py-client==0.1.13
 86 | lxml==5.1.0
 87 | marshmallow==3.20.2
 88 | matplotlib-inline==0.1.6
 89 | mmh3==4.1.0
 90 | monotonic==1.6
 91 | mpmath==1.3.0
 92 | multidict==6.0.5
 93 | mypy-extensions==1.0.0
 94 | nest-asyncio==1.6.0
 95 | networkx==3.2.1
 96 | nltk==3.8.1
 97 | numpy==1.26.4
 98 | oauthlib==3.2.2
 99 | onnxruntime==1.17.1
100 | openai==1.12.0
101 | opentelemetry-api==1.23.0
102 | opentelemetry-exporter-otlp-proto-common==1.23.0
103 | opentelemetry-exporter-otlp-proto-grpc==1.23.0
104 | opentelemetry-instrumentation==0.44b0
105 | opentelemetry-instrumentation-asgi==0.44b0
106 | opentelemetry-instrumentation-fastapi==0.44b0
107 | opentelemetry-proto==1.23.0
108 | opentelemetry-sdk==1.23.0
109 | opentelemetry-semantic-conventions==0.44b0
110 | opentelemetry-util-http==0.44b0
111 | orjson==3.9.15
112 | overrides==7.7.0
113 | packaging==23.2
114 | pandas==2.2.1
115 | parso==0.8.3
116 | pgvector==0.2.5
117 | pillow==10.2.0
118 | platformdirs==4.2.0
119 | posthog==3.4.2
120 | prompt-toolkit==3.0.43
121 | protobuf==4.25.3
122 | psutil==5.9.8
123 | psycopg2-binary==2.9.9
124 | pulsar-client==3.4.0
125 | pure-eval==0.2.2
126 | pyasn1==0.5.1
127 | pyasn1-modules==0.3.0
128 | pydantic==2.6.2
129 | pydantic_core==2.16.3
130 | Pygments==2.17.2
131 | PyMuPDF==1.23.25
132 | PyMuPDFb==1.23.22
133 | pypdf==4.0.2
134 | PyPika==0.48.9
135 | pyproject_hooks==1.0.0
136 | pyreadline3==3.4.1
137 | python-dateutil==2.8.2
138 | python-dotenv==1.0.1
139 | python-iso639==2024.2.7
140 | python-magic==0.4.27
141 | pytz==2024.1
142 | pywin32==306
143 | PyYAML==6.0.1
144 | pyzmq==25.1.2
145 | rapidfuzz==3.6.1
146 | regex==2023.12.25
147 | requests==2.31.0
148 | requests-oauthlib==1.3.1
149 | rsa==4.9
150 | six==1.16.0
151 | sniffio==1.3.1
152 | soupsieve==2.5
153 | SQLAlchemy==2.0.27
154 | stack-data==0.6.3
155 | starlette==0.36.3
156 | sympy==1.12
157 | tabulate==0.9.0
158 | tenacity==8.2.3
159 | tiktoken==0.6.0
160 | tokenizers==0.15.2
161 | tornado==6.4
162 | tqdm==4.66.2
163 | traitlets==5.14.1
164 | typer==0.9.0
165 | typing-inspect==0.9.0
166 | typing_extensions==4.10.0
167 | tzdata==2024.1
168 | unstructured==0.12.5
169 | unstructured-client==0.18.0
170 | urllib3==2.2.1
171 | uvicorn==0.27.1
172 | watchfiles==0.21.0
173 | wcwidth==0.2.13
174 | websocket-client==1.7.0
175 | websockets==12.0
176 | wrapt==1.16.0
177 | yarl==1.9.4
178 | zipp==3.17.0
179 | 


--------------------------------------------------------------------------------
/llamaindex.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
 10 |     "from llama_index.llms.openai import OpenAI\n",
 11 |     "from llama_index.core import Settings\n",
 12 |     "from dotenv import load_dotenv\n",
 13 |     "\n",
 14 |     "load_dotenv()\n",
 15 |     "\n",
 16 |     "documents = SimpleDirectoryReader(\"data\").load_data()\n",
 17 |     "print(documents)\n",
 18 |     "print(documents[0].text)"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "from llama_index.core.node_parser import SentenceSplitter\n",
 28 |     "\n",
 29 |     "text_splitter = SentenceSplitter(chunk_size=200, chunk_overlap=10)\n",
 30 |     "nodes = text_splitter.get_nodes_from_documents(documents=documents)\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "nodes"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "print(len(documents))\n",
 49 |     "print(len(nodes))"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
 59 |     "import chromadb\n",
 60 |     "from llama_index.core import StorageContext\n",
 61 |     "from llama_index.embeddings.openai import OpenAIEmbedding\n",
 62 |     "\n",
 63 |     "chroma_client = chromadb.EphemeralClient()\n",
 64 |     "chroma_collection = chroma_client.create_collection(\"tes1233t\")\n",
 65 |     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
 66 |     "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "index = VectorStoreIndex.from_documents(\n",
 76 |     "    documents, storage_context=storage_context, embed_model=OpenAIEmbedding()\n",
 77 |     ")"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, embed_model=OpenAIEmbedding())"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "retriever = index.as_retriever()"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "retriever.retrieve(\"How long does it take to prepare a pizza\")"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# llm = OpenAI(model=\"gpt-3.5-turbo\")\n",
114 |     "\n",
115 |     "# query_engine = index.as_query_engine(llm=llm)\n",
116 |     "\n",
117 |     "Settings.llm = OpenAI(model=\"gpt-3.5-turbo\")\n",
118 |     "\n",
119 |     "query_engine.query(\"How long does it take to prepare a pizza\")"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "prompts_dict = query_engine.get_prompts()\n",
129 |     "print(prompts_dict)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "from llama_index.core import PromptTemplate\n",
139 |     "\n",
140 |     "\n",
141 |     "new_summary_tmpl_str = (\n",
142 |     "    \"You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\\n\"\n",
143 |     "    \"{context_str}\\n\"\n",
144 |     "    \"Take that context and try to answer the question with it.\"\n",
145 |     "    \"Query: {query_str}\\n\"\n",
146 |     "    \"Answer: \"\n",
147 |     ")\n",
148 |     "new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "query_engine.update_prompts(\n",
158 |     "    {\"response_synthesizer:text_qa_template\": new_summary_tmpl}\n",
159 |     ")"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "prompts_dict = query_engine.get_prompts()\n",
169 |     "print(prompts_dict)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "query_engine.query(\"How long does it take to prepare a pizza\")"
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "app",
185 |    "language": "python",
186 |    "name": "python3"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.11.0"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 2
203 | }
204 | 


--------------------------------------------------------------------------------