├── .gitignore ├── README.md ├── data └── text.txt ├── langchain.ipynb ├── requirements.txt └── llamaindex.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | app 2 | .env 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLamaIndex vs LangChain 2 | 3 | Simple RAG System to compare LLamaIndex to LangChain 4 | -------------------------------------------------------------------------------- /data/text.txt: -------------------------------------------------------------------------------- 1 | Q: What makes our pizza unique? 2 | A: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust. 3 | 4 | Q: Do we offer gluten-free pizza options? 5 | A: Yes, we offer a delicious gluten-free crust option for our guests with dietary restrictions or preferences. 6 | 7 | Q: Can customers create their own pizza? 8 | A: Absolutely! Customers can choose from a variety of fresh toppings to create their own unique pizza masterpiece. 9 | 10 | Q: What are our most popular pizzas? 11 | A: Our most popular pizzas include the Classic Margherita, Pepperoni Supreme, and the Gourmet Vegetarian. Each offers a unique blend of flavors that cater to a variety of tastes. 12 | 13 | Q: Do we offer vegan pizza options? 14 | A: Yes, we have vegan pizza options which include dairy-free cheese and a variety of fresh vegetable toppings. 15 | 16 | Q: How long does it take to prepare a pizza? 17 | A: On average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection. 18 | 19 | Q: Can customers order pizza for delivery? 20 | A: Yes, we offer delivery services within a certain radius of our restaurant, ensuring that you can enjoy our pizzas from the comfort of your home. 21 | -------------------------------------------------------------------------------- /langchain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from langchain_community.document_loaders import DirectoryLoader\n", 10 | "\n", 11 | "from dotenv import load_dotenv\n", 12 | "\n", 13 | "load_dotenv()\n", 14 | "\n", 15 | "loader = DirectoryLoader('data', glob=\"**/*.txt\")\n", 16 | "\n", 17 | "documents = loader.load()" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "print(documents)\n", 27 | "print(documents[0].page_content)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from langchain.text_splitter import CharacterTextSplitter\n", 37 | "\n", 38 | "text_splitter = CharacterTextSplitter(\n", 39 | " separator=\"\\n\\n\",\n", 40 | " chunk_size=250,\n", 41 | " chunk_overlap=10,\n", 42 | " length_function=len,\n", 43 | " is_separator_regex=False,\n", 44 | ")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "chunks = text_splitter.split_documents(documents)\n", 54 | "chunks" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "from langchain_openai import OpenAIEmbeddings\n", 64 | "from langchain_community.vectorstores import Chroma\n", 65 | "\n", 66 | "index = Chroma.from_documents(chunks, OpenAIEmbeddings())" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "retriever = index.as_retriever()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "retriever.get_relevant_documents(\"How long does it take to prepare a pizza\")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "from operator import itemgetter\n", 94 | "\n", 95 | "template = \"\"\"\n", 96 | "Answer the question based only on the following context:\n", 97 | "{context}\n", 98 | "\n", 99 | "Answer the following question:\n", 100 | "Question: {question}\n", 101 | "\"\"\"" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "from langchain_core.output_parsers import StrOutputParser\n", 111 | "from langchain_core.prompts import ChatPromptTemplate\n", 112 | "from langchain_openai import ChatOpenAI\n", 113 | "\n", 114 | "prompt = ChatPromptTemplate.from_template(template)\n", 115 | "model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "rag_chain = (\n", 125 | " {\n", 126 | " \"context\": itemgetter(\"question\") | retriever,\n", 127 | " \"question\": itemgetter(\"question\")\n", 128 | " }\n", 129 | " | prompt\n", 130 | " | model\n", 131 | " | StrOutputParser()\n", 132 | ")" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "rag_chain.invoke({\"question\": \"How long does it take to prepare a pizza\"})" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "kernelspec": { 147 | "display_name": "app", 148 | "language": "python", 149 | "name": "python3" 150 | }, 151 | "language_info": { 152 | "codemirror_mode": { 153 | "name": "ipython", 154 | "version": 3 155 | }, 156 | "file_extension": ".py", 157 | "mimetype": "text/x-python", 158 | "name": "python", 159 | "nbconvert_exporter": "python", 160 | "pygments_lexer": "ipython3", 161 | "version": "3.11.0" 162 | } 163 | }, 164 | "nbformat": 4, 165 | "nbformat_minor": 2 166 | } 167 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.3 2 | aiosignal==1.3.1 3 | annotated-types==0.6.0 4 | anyio==4.3.0 5 | asgiref==3.7.2 6 | asttokens==2.4.1 7 | async-timeout==4.0.3 8 | asyncpg==0.29.0 9 | attrs==23.2.0 10 | backoff==2.2.1 11 | bcrypt==4.1.2 12 | beautifulsoup4==4.12.3 13 | bs4==0.0.2 14 | build==1.0.3 15 | cachetools==5.3.2 16 | certifi==2024.2.2 17 | chardet==5.2.0 18 | charset-normalizer==3.3.2 19 | chroma-hnswlib==0.7.3 20 | chromadb==0.4.23 21 | click==8.1.7 22 | colorama==0.4.6 23 | coloredlogs==15.0.1 24 | comm==0.2.1 25 | dataclasses-json==0.6.4 26 | dataclasses-json-speakeasy==0.5.11 27 | debugpy==1.8.1 28 | decorator==5.1.1 29 | Deprecated==1.2.14 30 | dirtyjson==1.0.8 31 | distro==1.9.0 32 | emoji==2.10.1 33 | executing==2.0.1 34 | fastapi==0.110.0 35 | filelock==3.13.1 36 | filetype==1.2.0 37 | flatbuffers==23.5.26 38 | frozenlist==1.4.1 39 | fsspec==2024.2.0 40 | google-auth==2.28.1 41 | googleapis-common-protos==1.62.0 42 | greenlet==3.0.3 43 | grpcio==1.62.0 44 | h11==0.14.0 45 | httpcore==1.0.4 46 | httptools==0.6.1 47 | httpx==0.27.0 48 | huggingface-hub==0.20.3 49 | humanfriendly==10.0 50 | idna==3.6 51 | importlib-metadata==6.11.0 52 | importlib_resources==6.1.2 53 | ipykernel==6.29.2 54 | ipython==8.22.1 55 | jedi==0.19.1 56 | joblib==1.3.2 57 | jsonpatch==1.33 58 | jsonpath-python==1.0.6 59 | jsonpointer==2.4 60 | jupyter_client==8.6.0 61 | jupyter_core==5.7.1 62 | kubernetes==29.0.0 63 | langchain==0.1.9 64 | langchain-community==0.0.24 65 | langchain-core==0.1.27 66 | langchain-openai==0.0.8 67 | langdetect==1.0.9 68 | langsmith==0.1.10 69 | llama-index==0.10.12 70 | llama-index-agent-openai==0.1.5 71 | llama-index-cli==0.1.5 72 | llama-index-core==0.10.12 73 | llama-index-embeddings-openai==0.1.6 74 | llama-index-indices-managed-llama-cloud==0.1.3 75 | llama-index-legacy==0.9.48 76 | llama-index-llms-openai==0.1.6 77 | llama-index-multi-modal-llms-openai==0.1.4 78 | llama-index-program-openai==0.1.4 79 | llama-index-question-gen-openai==0.1.3 80 | llama-index-readers-file==0.1.5 81 | llama-index-readers-llama-parse==0.1.3 82 | llama-index-vector-stores-chroma==0.1.4 83 | llama-index-vector-stores-postgres==0.1.2 84 | llama-parse==0.3.4 85 | llamaindex-py-client==0.1.13 86 | lxml==5.1.0 87 | marshmallow==3.20.2 88 | matplotlib-inline==0.1.6 89 | mmh3==4.1.0 90 | monotonic==1.6 91 | mpmath==1.3.0 92 | multidict==6.0.5 93 | mypy-extensions==1.0.0 94 | nest-asyncio==1.6.0 95 | networkx==3.2.1 96 | nltk==3.8.1 97 | numpy==1.26.4 98 | oauthlib==3.2.2 99 | onnxruntime==1.17.1 100 | openai==1.12.0 101 | opentelemetry-api==1.23.0 102 | opentelemetry-exporter-otlp-proto-common==1.23.0 103 | opentelemetry-exporter-otlp-proto-grpc==1.23.0 104 | opentelemetry-instrumentation==0.44b0 105 | opentelemetry-instrumentation-asgi==0.44b0 106 | opentelemetry-instrumentation-fastapi==0.44b0 107 | opentelemetry-proto==1.23.0 108 | opentelemetry-sdk==1.23.0 109 | opentelemetry-semantic-conventions==0.44b0 110 | opentelemetry-util-http==0.44b0 111 | orjson==3.9.15 112 | overrides==7.7.0 113 | packaging==23.2 114 | pandas==2.2.1 115 | parso==0.8.3 116 | pgvector==0.2.5 117 | pillow==10.2.0 118 | platformdirs==4.2.0 119 | posthog==3.4.2 120 | prompt-toolkit==3.0.43 121 | protobuf==4.25.3 122 | psutil==5.9.8 123 | psycopg2-binary==2.9.9 124 | pulsar-client==3.4.0 125 | pure-eval==0.2.2 126 | pyasn1==0.5.1 127 | pyasn1-modules==0.3.0 128 | pydantic==2.6.2 129 | pydantic_core==2.16.3 130 | Pygments==2.17.2 131 | PyMuPDF==1.23.25 132 | PyMuPDFb==1.23.22 133 | pypdf==4.0.2 134 | PyPika==0.48.9 135 | pyproject_hooks==1.0.0 136 | pyreadline3==3.4.1 137 | python-dateutil==2.8.2 138 | python-dotenv==1.0.1 139 | python-iso639==2024.2.7 140 | python-magic==0.4.27 141 | pytz==2024.1 142 | pywin32==306 143 | PyYAML==6.0.1 144 | pyzmq==25.1.2 145 | rapidfuzz==3.6.1 146 | regex==2023.12.25 147 | requests==2.31.0 148 | requests-oauthlib==1.3.1 149 | rsa==4.9 150 | six==1.16.0 151 | sniffio==1.3.1 152 | soupsieve==2.5 153 | SQLAlchemy==2.0.27 154 | stack-data==0.6.3 155 | starlette==0.36.3 156 | sympy==1.12 157 | tabulate==0.9.0 158 | tenacity==8.2.3 159 | tiktoken==0.6.0 160 | tokenizers==0.15.2 161 | tornado==6.4 162 | tqdm==4.66.2 163 | traitlets==5.14.1 164 | typer==0.9.0 165 | typing-inspect==0.9.0 166 | typing_extensions==4.10.0 167 | tzdata==2024.1 168 | unstructured==0.12.5 169 | unstructured-client==0.18.0 170 | urllib3==2.2.1 171 | uvicorn==0.27.1 172 | watchfiles==0.21.0 173 | wcwidth==0.2.13 174 | websocket-client==1.7.0 175 | websockets==12.0 176 | wrapt==1.16.0 177 | yarl==1.9.4 178 | zipp==3.17.0 179 | -------------------------------------------------------------------------------- /llamaindex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", 10 | "from llama_index.llms.openai import OpenAI\n", 11 | "from llama_index.core import Settings\n", 12 | "from dotenv import load_dotenv\n", 13 | "\n", 14 | "load_dotenv()\n", 15 | "\n", 16 | "documents = SimpleDirectoryReader(\"data\").load_data()\n", 17 | "print(documents)\n", 18 | "print(documents[0].text)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from llama_index.core.node_parser import SentenceSplitter\n", 28 | "\n", 29 | "text_splitter = SentenceSplitter(chunk_size=200, chunk_overlap=10)\n", 30 | "nodes = text_splitter.get_nodes_from_documents(documents=documents)\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "nodes" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "print(len(documents))\n", 49 | "print(len(nodes))" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "from llama_index.vector_stores.chroma import ChromaVectorStore\n", 59 | "import chromadb\n", 60 | "from llama_index.core import StorageContext\n", 61 | "from llama_index.embeddings.openai import OpenAIEmbedding\n", 62 | "\n", 63 | "chroma_client = chromadb.EphemeralClient()\n", 64 | "chroma_collection = chroma_client.create_collection(\"tes1233t\")\n", 65 | "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n", 66 | "storage_context = StorageContext.from_defaults(vector_store=vector_store)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "index = VectorStoreIndex.from_documents(\n", 76 | " documents, storage_context=storage_context, embed_model=OpenAIEmbedding()\n", 77 | ")" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, embed_model=OpenAIEmbedding())" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "retriever = index.as_retriever()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "retriever.retrieve(\"How long does it take to prepare a pizza\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "# llm = OpenAI(model=\"gpt-3.5-turbo\")\n", 114 | "\n", 115 | "# query_engine = index.as_query_engine(llm=llm)\n", 116 | "\n", 117 | "Settings.llm = OpenAI(model=\"gpt-3.5-turbo\")\n", 118 | "\n", 119 | "query_engine.query(\"How long does it take to prepare a pizza\")" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "prompts_dict = query_engine.get_prompts()\n", 129 | "print(prompts_dict)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "from llama_index.core import PromptTemplate\n", 139 | "\n", 140 | "\n", 141 | "new_summary_tmpl_str = (\n", 142 | " \"You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\\n\"\n", 143 | " \"{context_str}\\n\"\n", 144 | " \"Take that context and try to answer the question with it.\"\n", 145 | " \"Query: {query_str}\\n\"\n", 146 | " \"Answer: \"\n", 147 | ")\n", 148 | "new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "query_engine.update_prompts(\n", 158 | " {\"response_synthesizer:text_qa_template\": new_summary_tmpl}\n", 159 | ")" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "prompts_dict = query_engine.get_prompts()\n", 169 | "print(prompts_dict)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "query_engine.query(\"How long does it take to prepare a pizza\")" 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "app", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.11.0" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 2 203 | } 204 | --------------------------------------------------------------------------------