├── .env.example ├── .gitignore ├── .python-version ├── .vscode └── launch.json ├── LICENSE ├── README.md ├── langgraph.json ├── notebooks └── rag-from-scratch │ ├── 01-overview.ipynb │ ├── 02-indexing.ipynb │ ├── 03-retrieval.ipynb │ ├── 04-generation.ipynb │ ├── 05-multi-query.ipynb │ ├── 06-rag-fusion.ipynb │ ├── 07-01-decomposition-recursive.ipynb │ ├── 07-02-decomposition-parallel.ipynb │ ├── 08-step-back.ipynb │ ├── 09-hyde.ipynb │ ├── 10-01-logical-routing.ipynb │ ├── 10-02-semantic-routing.ipynb │ ├── 11-query-construction.ipynb │ ├── 12-01-multi-vector-summary.ipynb │ ├── 12-02-multi-vector-chunks.ipynb │ ├── 12-03-multi-vector-hypothetical-questions.ipynb │ ├── 13-raptor.ipynb │ ├── 14-colbert.ipynb │ ├── 15-crag.ipynb │ ├── 16-self-rag.ipynb │ └── images │ ├── 01-01-overview.png │ ├── 01-02-overview.png │ ├── 02-indexing.png │ ├── 03-01-retrieval.png │ ├── 03-02-retrieval.png │ ├── 04-generation.png │ ├── 05-multi-query.png │ ├── 06-rag-fusion.png │ ├── 07-01-decomposition-recursive.png │ ├── 07-02-decomposition-parallel.png │ ├── 08-step-back.png │ ├── 09-hyde.png │ ├── 10-01-logical-routing.png │ ├── 10-01-structured-output.png │ ├── 10-02-semantic-routing.png │ ├── 11-query-construction.png │ ├── 11-self-query.jpg │ ├── 12-01-multi-vector-summary.png │ ├── 12-02-multi-vector-chunks.png │ ├── 12-03-multi-vector-hypothetical-questions.png │ ├── 13-raptor.png │ ├── 14-01-colbert.png │ ├── 14-02-colbert.jpg │ ├── 15-crag-implementation.png │ ├── 15-crag.png │ ├── 16-self-rag-implementation.png │ ├── 16-self-rag.png │ ├── generation.png │ ├── indexing-01.png │ ├── query-construction-01.png │ ├── query-translation-01.png │ ├── query-translation-02.png │ ├── rag.png │ ├── retrieval.png │ └── routing-01.png ├── pyproject.toml ├── requirements.txt ├── src └── llm_rag │ ├── __init__.py │ ├── graphs │ ├── colbert │ │ ├── __init__.py │ │ └── colbert_model.py │ ├── crag │ │ ├── __init__.py │ │ └── crag.py │ ├── decomposition │ │ ├── __init__.py │ │ ├── parallel.py │ │ └── recursive.py │ ├── hyde │ │ ├── __init__.py │ │ └── hyde.py │ ├── multi_query │ │ ├── __init__.py │ │ └── multi_query.py │ ├── multi_vector │ │ ├── __init__.py │ │ ├── chunks.py │ │ ├── hypothetical_questions.py │ │ └── summary.py │ ├── query_construction │ │ ├── __init__.py │ │ └── self_query.py │ ├── rag_fusion │ │ ├── __init__.py │ │ └── rag_fusion.py │ ├── raptor │ │ ├── __init__.py │ │ └── raptor.py │ ├── routing │ │ ├── __init__.py │ │ ├── logical.py │ │ └── semantic.py │ ├── self_rag │ │ ├── __init__.py │ │ └── self_rag.py │ ├── step_back │ │ ├── __init__.py │ │ └── step_back.py │ └── utils.py │ └── indexing │ ├── __init__.py │ ├── article.py │ ├── colbert_model.py │ ├── multi_vector │ ├── __init__.py │ ├── chunks.py │ ├── hypothetical_questions.py │ └── summary.py │ ├── raptor │ ├── __init__.py │ ├── raptor.py │ └── utils.py │ ├── reflection.py │ └── self_query.py └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | LANGCHAIN_TRACING_V2=true 2 | LANGCHAIN_API_KEY= 3 | 4 | OPENAI_API_KEY= 5 | 6 | TAVILY_API_KEY= 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | 177 | .env.* 178 | data/ 179 | .langgraph_api/ 180 | .ragatouille/ -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python Debugger: Current File", 9 | "type": "debugpy", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": false 14 | }, 15 | { 16 | "name": "Python Debugger: Remote Attach", 17 | "type": "debugpy", 18 | "request": "attach", 19 | "connect": { 20 | "host": "localhost", 21 | "port": 5678 22 | }, 23 | "pathMappings": [ 24 | { 25 | "localRoot": "${workspaceFolder}", 26 | "remoteRoot": "." 27 | } 28 | ], 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025, Dmitry Labazkin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM RAG 2 | ## Configuration 3 | ### Environment 4 | - Copy `.env.example` file to `.env` 5 | - Fill in the required values 6 | ## Installation 7 | ### Using `pip` 8 | - Create and activate new Python virtual environment 9 | - `pip install -r requirements.txt` 10 | - `pip install -e .` (basic packages) 11 | or 12 | `pip install -e .[ragatouille]` (with `ragatouille` for ColBERT) 13 | ### Using `uv` 14 | - `uv sync --group dev` (basic packages) 15 | or 16 | `uv sync --group dev --extra ragatouille` (with `ragatouille` for ColBERT) 17 | - `uv pip install -e .` 18 | 19 | 20 | ## RAG From Scratch 21 | Author of original series - [Lance Martin](https://github.com/rlancemartin) 22 | ### Original Sources 23 | #### Video 24 | [YouTube Playlist](https://www.youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) 25 | 26 | #### Code 27 | [Main Repository](https://github.com/langchain-ai/rag-from-scratch) 28 | [Corrective RAG (CRAG)](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_crag.ipynb) 29 | [Self-RAG](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag.ipynb) 30 | 31 | 32 | ### Table of Contents 33 | | Part | Name | Video | Slides | Jupyter Notebook | Python Script | LangGraph Studio | 34 | | ---- | ---------------------------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------- | 35 | | 1 | Overview | [Watch](https://www.youtube.com/watch?v=wd7TZ4w1mSw&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1C9IaAwHoWcc4RSTqo-pCoN3h0nCgqV2JEYZUJunv_9Q) | [01-overview.ipynb](notebooks/rag-from-scratch/01-overview.ipynb) | - | - | 36 | | 2 | Indexing | [Watch](https://www.youtube.com/watch?v=bjb_EMsTDKI&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1MhsCqZs7wTX6P19TFnA9qRSlxH3u-1-0gWkhBiDG9lQ) | [02-indexing.ipynb](notebooks/rag-from-scratch/02-indexing.ipynb) | - | - | 37 | | 3 | Retrieval | [Watch](https://www.youtube.com/watch?v=LxNVgdIz9sU&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/124I8jlBRCbb0LAUhdmDwbn4nREqxSxZU1RF_eTGXUGc) | [03-retrieval.ipynb](notebooks/rag-from-scratch/03-retrieval.ipynb) | - | - | 38 | | 4 | Generation | [Watch](https://www.youtube.com/watch?v=Vw52xyyFsB8&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1eRJwzbdSv71e9Ou9yeqziZrz1UagwX8B1kL4TbL5_Gc) | [04-generation.ipynb](notebooks/rag-from-scratch/04-generation.ipynb) | - | - | 39 | | 5 | Query Translation - Multi-Query | [Watch](https://www.youtube.com/watch?v=JChPi0CRnDY&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/15pWydIszbQG3Ipur9COfTduutTZm6ULdkkyX-MNry8I) | [05-multi-query.ipynb](notebooks/rag-from-scratch/05-multi-query.ipynb) | [multi_query.py](src/llm_rag/graphs/multi_query/multi_query.py) | Query Translation - Multi-Query | 40 | | 6 | Query Translation - RAG-Fusion | [Watch](https://www.youtube.com/watch?v=77qELPbNgxA&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1EwykmdVSQqlh6XpGt8APOMYp4q1CZqqeclAx61pUcjI) | [06-rag-fusion.ipynb](notebooks/rag-from-scratch/06-rag-fusion.ipynb) | [rag_fusion.py](src/llm_rag/graphs/rag_fusion/rag_fusion.py) | Query Translation - RAG-Fusion | 41 | | 7 | Query Translation - Decomposition | [Watch](https://www.youtube.com/watch?v=h0OPWlEOank&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1O97KYrsmYEmhpQ6nkvOVAqQYMJvIaZulGFGmz4cuuVE) | [07-01-decomposition-recursive.ipynb](notebooks/rag-from-scratch/07-01-decomposition-recursive.ipynb)
[07-02-decomposition-parallel.ipynb](notebooks/rag-from-scratch/07-02-decomposition-parallel.ipynb) | [recursive.py](src/llm_rag/graphs/decomposition/recursive.py)
[parallel.py](src/llm_rag/graphs/decomposition/parallel.py) | Query Translation - Decomposition (Recursive)
Query Translation - Decomposition (Parallel) | 42 | | 8 | Query Translation - Step-Back Prompting | [Watch](https://www.youtube.com/watch?v=xn1jEjRyJ2U&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1L0MRGVDxYA1eLOR0L_6Ze1l2YV8AhN1QKUtmNA-fJlU) | [08-step-back.ipynb](notebooks/rag-from-scratch/08-step-back.ipynb) | [step_back.py](src/llm_rag/graphs/step_back/step_back.py) | Query Translation - Step-Back Prompting | 43 | | 9 | Query Translation - HyDE | [Watch](https://www.youtube.com/watch?v=SaDzIVkYqyY&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/10MmB_QEiS4m00xdyu-92muY-8jC3CdaMpMXbXjzQXsM) | [09-hyde.ipynb](notebooks/rag-from-scratch/09-hyde.ipynb) | [hyde.py](src/llm_rag/graphs/hyde/hyde.py) | Query Translation - HyDE | 44 | | 10 | Routing | [Watch](https://www.youtube.com/watch?v=pfpIndq7Fi8&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1kC6jFj8C_1ZXDYcFaJ8vhJvCYEwxwsVqk2VVeKKuyx4) | [10-01-logical-routing.ipynb](notebooks/rag-from-scratch/10-01-logical-routing.ipynb) | [logical.py](src/llm_rag/graphs/routing/logical.py)
[semantic.py](src/llm_rag/graphs/routing/semantic.py) | Routing - Logical Routing
Routing - Semantic Routing | 45 | | 11 | Query Construction | [Watch](https://www.youtube.com/watch?v=kl6NwWYxvbM&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1bUwz4PgzMIwbBi7DFzpHUkLL4Z6jcKmNGJ-BlK0Hpps) | [11-query-construction.ipynb](notebooks/rag-from-scratch/11-query-construction.ipynb) | [self_query.py](src/llm_rag/graphs/query_construction/self_query.py) | Query Construction - Self-Query | 46 | | 12 | Indexing - Multi-Representation Indexing | [Watch](https://www.youtube.com/watch?v=gTCU9I6QqCE&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1Pu3q1MApA-V_PMvL2YDmWzaDX3HkTh0uUl2BFTcsalk) | [12-01-multi-vector-summary.ipynb](notebooks/rag-from-scratch/12-01-multi-vector-summary.ipynb)
[12-02-multi-vector-chunks.ipynb](notebooks/rag-from-scratch/12-02-multi-vector-chunks.ipynb)
[12-03-multi-vector-hypothetical-questions.ipynb](notebooks/rag-from-scratch/12-03-multi-vector-hypothetical-questions.ipynb) | [summary.py](src/llm_rag/graphs/multi_vector/summary.py)
[chunks.py](src/llm_rag/graphs/multi_vector/chunks.py)
[hypothetical_questions.py](src/llm_rag/graphs/multi_vector/hypothetical_questions.py) | Indexing - Multi-Vector - Summary
Indexing - Multi-Vector - Chunks
Indexing - Multi-Vector - Hypothetical Questions | 47 | | 13 | Indexing - RAPTOR | [Watch](https://www.youtube.com/watch?v=z_6EeA2LDSw&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1U8NpSS1sq3-deiNvSGGNg_UY2Zh_5fS2HabuQFJPftc) | [13-raptor.ipynb](notebooks/rag-from-scratch/13-raptor.ipynb) | [raptor.py](src/llm_rag/graphs/raptor/raptor.py) | - | | 48 | | 14 | Indexing - ColBERT | [Watch](https://www.youtube.com/watch?v=cN6S0Ehm7_8&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1IRhAdGjIevrrotdplHNcc4aXgIYyKamUKTWtB3m3aMU) | [14-colbert.ipynb](notebooks/rag-from-scratch/14-colbert.ipynb) | [colbert_model.py](src/llm_rag/graphs/colbert/colbert_model.py) | - | | 49 | | 15 | Retrieval - CRAG | [Watch](https://www.youtube.com/watch?v=pbAd8O1Lvm4) | - | [15-crag.ipynb](notebooks/rag-from-scratch/15-crag.ipynb) | [crag.py](src/llm_rag/graphs/crag/crag.py) | Retrieval - CRAG | | 50 | | 16 | Generation - Self-RAG | [Watch](https://www.youtube.com/watch?v=pbAd8O1Lvm4) | - | [16-self-rag.ipynb](notebooks/rag-from-scratch/16-self-rag.ipynb) | [self_rag.py](src/llm_rag/graphs/self_rag/self_rag.py) | Generation - Self-RAG 51 | 52 | 53 | ### Query Translation 54 | #### RAG Fusion 55 | [Forget RAG, the Future is RAG-Fusion](https://medium.com/towards-data-science/forget-rag-the-future-is-rag-fusion-1147298d8ad1) 56 | [RAG-Fusion: The Next Frontier of Search Technology](https://github.com/Raudaschl/rag-fusion) 57 | [Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) 58 | [Implementing Reciprocal Rank Fusion (RRF) in Python](https://safjan.com/implementing-rank-fusion-in-python) 59 | 60 | #### Decomposition (Recursive) 61 | [Least-to-Most Prompting Enables Complex Reasoning in Large Language Models](https://arxiv.org/abs/2205.10625) 62 | [Interleaving Retrieval with Chain-of-Thought Reasoning for Knowledge-Intensive Multi-Step Questions](https://arxiv.org/abs/2212.10509) 63 | 64 | #### Step-Back Prompting 65 | [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](https://arxiv.org/abs/2310.06117) 66 | 67 | #### HyDE 68 | [Precise Zero-Shot Dense Retrieval without Relevance Labels](https://arxiv.org/abs/2212.10496) 69 | 70 | 71 | ### Routing 72 | [Semantic Router](https://github.com/aurelio-labs/semantic-router) 73 | 74 | 75 | ### Query Construction 76 | [Langchain Self Query With Dates](https://notes.alexkehayias.com/langchain-self-query-with-dates/) 77 | 78 | 79 | ### Indexing 80 | #### Multi-Representation Indexing 81 | [Dense X Retrieval: What Retrieval Granularity Should We Use?](https://arxiv.org/abs/2312.06648) 82 | 83 | #### RAPTOR 84 | [RAPTOR: Recursive Abstractive Processing for Tre-Organized Retrieval](https://arxiv.org/pdf/2401.18059) 85 | [Building long context RAG with RAPTOR from scratch](https://www.youtube.com/watch?v=jbGchdTL7d0) 86 | 87 | #### ColBERT 88 | [ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT](https://arxiv.org/abs/2004.12832) 89 | [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488) 90 | [RAGatouille](https://github.com/AnswerDotAI/RAGatouille) 91 | [[Paper review] ColBERT, ColBERTv2](https://pangyoalto.com/en/colbertv1-2-review-en) 92 | [Overcoming the Limits of RAG with ColBERT](https://thenewstack.io/overcoming-the-limits-of-rag-with-colbert) 93 | [ColBERT Inference in the Browser](https://colbert.aiserv.cloud) 94 | 95 | 96 | ### Agentic RAG 97 | [Self-Reflective RAG with LangGraph](https://blog.langchain.dev/agentic-rag-with-langgraph) 98 | 99 | #### CRAG 100 | [Corrective Retrieval Augmented Generation](https://arxiv.org/abs/2401.15884) 101 | 102 | #### Self-RAG 103 | [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](https://arxiv.org/abs/2310.11511) -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | "." 4 | ], 5 | "graphs": { 6 | "Query Translation - Multi-Query": "src/llm_rag/graphs/multi_query/multi_query.py:graph", 7 | "Query Translation - RAG-Fusion": "src/llm_rag/graphs/rag_fusion/rag_fusion.py:graph", 8 | "Query Translation - Decomposition (Recursive)": "src/llm_rag/graphs/decomposition/recursive.py:graph", 9 | "Query Translation - Decomposition (Parallel)": "src/llm_rag/graphs/decomposition/parallel.py:graph", 10 | "Query Translation - Step-Back Prompting": "src/llm_rag/graphs/step_back/step_back.py:graph", 11 | "Query Translation - HyDE": "src/llm_rag/graphs/hyde/hyde.py:graph", 12 | "Routing - Logical Routing": "src/llm_rag/graphs/routing/logical.py:graph", 13 | "Routing - Semantic Routing": "src/llm_rag/graphs/routing/semantic.py:graph", 14 | "Query Construction - Self-Query": "src/llm_rag/graphs/query_construction/self_query.py:graph", 15 | "Indexing - Multi-Vector - Summary": "src/llm_rag/graphs/multi_vector/summary.py:graph", 16 | "Indexing - Multi-Vector - Chunks": "src/llm_rag/graphs/multi_vector/chunks.py:graph", 17 | "Indexing - Multi-Vector - Hypothetical Questions": "src/llm_rag/graphs/multi_vector/hypothetical_questions.py:graph", 18 | "Retrieval - CRAG": "src/llm_rag/graphs/crag/crag.py:graph", 19 | "Generation - Self-RAG": "src/llm_rag/graphs/self_rag/self_rag.py:graph" 20 | }, 21 | "env": "./.env", 22 | "dockerfile_lines": [] 23 | } -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/01-overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "id": "07416ba6-134d-4b1e-905b-aacd355cb91e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "\n", 12 | "from dotenv import find_dotenv, load_dotenv" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 11, 18 | "id": "5f57d2d4-b264-4de9-bc06-c14307418060", 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "True" 25 | ] 26 | }, 27 | "execution_count": 11, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "load_dotenv(find_dotenv('.env'))" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 12, 39 | "id": "66b88d20-563a-4ccb-bb28-6b6144d048b7", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"RAG From Scratch: Part 1 (Overview)\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "9c35780a-28cd-4e73-a39c-b533dca92276", 49 | "metadata": {}, 50 | "source": [ 51 | "![](images/rag.png)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "66049591-4763-41c0-9f86-c1da026294a0", 57 | "metadata": {}, 58 | "source": [ 59 | "# Part 1: Overview" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "id": "43f0b3d9-cb4c-4962-9a9a-d8775a72468c", 65 | "metadata": {}, 66 | "source": [ 67 | "![](images/01-01-overview.png)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "d592a47f-42df-4e4a-86da-65f245129e1c", 73 | "metadata": {}, 74 | "source": [ 75 | "![](images/01-02-overview.png)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "b0e3e69b-4bbc-41b6-8ffe-dbc4c5221ca3", 81 | "metadata": {}, 82 | "source": [ 83 | "## Configure components" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 13, 89 | "id": "590c8ee8-4228-4054-b312-f89e5ff6d635", 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "from langchain_openai import ChatOpenAI, OpenAIEmbeddings" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 14, 99 | "id": "bfb1e9b8-70e6-4490-bb8a-0d0a066c4683", 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 8, 'total_tokens': 18, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_b376dfbbd5', 'id': 'chatcmpl-BKMvWXqejr2sHYneYvd4Sr6SbRKv0', 'finish_reason': 'stop', 'logprobs': None}, id='run-f1bd73da-f189-46b5-8fab-aa082cf98064-0', usage_metadata={'input_tokens': 8, 'output_tokens': 10, 'total_tokens': 18, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})" 106 | ] 107 | }, 108 | "execution_count": 14, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "llm = ChatOpenAI(\n", 115 | " model=\"gpt-4o-mini\",\n", 116 | " temperature=0\n", 117 | ")\n", 118 | "llm.invoke(\"Hello\")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 15, 124 | "id": "c2bdeebb-3875-4fdd-97be-346c92eeb240", 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "1536" 131 | ] 132 | }, 133 | "execution_count": 15, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", 140 | "len(embeddings.embed_query(\"Hello\"))" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "id": "9ca63ab0-004c-4f93-b0e6-a06c7e84c3ed", 146 | "metadata": {}, 147 | "source": [ 148 | "## Load documents" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 16, 154 | "id": "5b127b80-4839-4edd-9b6e-1a55a90a3fba", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "import bs4\n", 159 | "from langchain_community.document_loaders import WebBaseLoader" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 17, 165 | "id": "e29df1f8-327a-437a-85f9-d87867cbfd28", 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "1" 172 | ] 173 | }, 174 | "execution_count": 17, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "loader = WebBaseLoader(\n", 181 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 182 | " bs_kwargs=dict(\n", 183 | " parse_only=bs4.SoupStrainer(\n", 184 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 185 | " )\n", 186 | " ),\n", 187 | ")\n", 188 | "docs = loader.load()\n", 189 | "len(docs)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 18, 195 | "id": "2d06a9e8-aeab-4b37-8ac4-20279c0802af", 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "\n", 203 | "\n", 204 | " LLM Powered Autonomous Agents\n", 205 | " \n", 206 | "Date: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\n", 207 | "\n", 208 | "\n", 209 | "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n", 210 | "Agent System Overview#\n", 211 | "In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n", 212 | "\n", 213 | "Planning\n", 214 | "\n", 215 | "Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\n", 216 | "Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n", 217 | "\n", 218 | "\n", 219 | "Memory\n", 220 | "\n", 221 | "Short-term memory: I \n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "print(docs[0].page_content[:1000])" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "id": "1aff715e-f153-486a-9e4a-85ae300b03e7", 232 | "metadata": {}, 233 | "source": [ 234 | "## Split documents" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 19, 240 | "id": "8621acc3-5ab8-4f70-a2cb-171795bcf9cb", 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "from langchain.text_splitter import RecursiveCharacterTextSplitter" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 20, 250 | "id": "3143439a-0271-4ad1-8f9b-78cd2a631098", 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "66" 257 | ] 258 | }, 259 | "execution_count": 20, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 266 | "splits = text_splitter.split_documents(docs)\n", 267 | "len(splits)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "id": "05600430-cf93-4429-92ea-ca183ec2310c", 273 | "metadata": {}, 274 | "source": [ 275 | "## Store documents" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 21, 281 | "id": "e13dd7d7-06d6-47ef-8251-ea0ec7dec665", 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "from langchain_core.vectorstores import InMemoryVectorStore" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 22, 291 | "id": "c49eece0-d92c-47ac-8db5-bd36e6eb185e", 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "(66, 66)" 298 | ] 299 | }, 300 | "execution_count": 22, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "vectorstore = InMemoryVectorStore(embeddings)\n", 307 | "doc_ids = vectorstore.add_documents(documents=splits)\n", 308 | "len(doc_ids), len(vectorstore.store)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "id": "06c4043d-c52d-4148-b583-d67d628dbff5", 314 | "metadata": {}, 315 | "source": [ 316 | "## RAG" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 23, 322 | "id": "561288ac-3b88-4ef0-a4df-9e3ce2d5f029", 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "from typing import TypedDict\n", 327 | "\n", 328 | "from langchain_core.documents import Document\n", 329 | "from langchain_core.messages import HumanMessage\n", 330 | "from langgraph.graph import END, START, StateGraph" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 24, 336 | "id": "f7f2b84b-6454-4419-81dd-22c1df67e4ac", 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n", 344 | "Question: {question} \n", 345 | "Context: {context} \n", 346 | "Answer:\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "rag_prompt_template = \"\"\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n", 352 | "Question: {question} \n", 353 | "Context: {context} \n", 354 | "Answer:\"\"\"\n", 355 | "print(rag_prompt_template)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 25, 361 | "id": "c7efcd9e-e6ef-4de0-9c48-0fab0bfb860a", 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "def format_docs(docs):\n", 366 | " return \"\\n\\n\".join(doc.page_content for doc in docs)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 26, 372 | "id": "52e36a6b-474e-4d75-9a72-31f6c05e2f7b", 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "class State(TypedDict):\n", 377 | " question: str\n", 378 | " context: list[Document]\n", 379 | " answer: str" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 27, 385 | "id": "708e72c8-6cc2-4d87-ac99-a89d6dba7e2e", 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAG0AAAFNCAIAAACFQXaDAAAAAXNSR0IArs4c6QAAHw1JREFUeJztnXdYFGf+wN/tfYFdegcp0lUsRL0oUbHExAbRQz29JJfEckZjjZrTFONdLmp+XkxiNKfYY0NjOfUssSQxRkUR0AUEKStL2WV7L78/xuM42d2ZhXfZHZjPc8897s47M18+eafs274km80GCLoM2dMB9BAIj3AgPMKB8AgHwiMcCI9woEI5iqRGr1WatWqLxWQz6KxQjulWaEwylUJi8ylsHiUokkmmkLp4QFJX3h9Fd1RVD9TVJZroZI7NBthcil8Q3ajHgUc6iyxvNmqVFoPW8rRaH5HAjk3j9B3Mo1I7eYF20mPJL4qff5BGJ7Nj07gxqRwKtav/PT1LzUNN1QNNfYWu7yDeoBxBJ47gskfpU8P5PZKQGNbQV4UMFqUTp/Rmbp6V3r8qz5kdFJPKdWlH1zyK7qjuXGqd+GYIX0BzPUh8YDRYfzzS5BdId6liuuDxSZmm/I4qZ3ZwZyPEEzfPSmkMcuYoP4zlsXosutLaWGsYN6dXSET4+XSLTm0ZNSMIS2FMj6faR9pakbZXSQQADJ3oT6OT71+TYymM7lGtMN+/Lp/0ThiM2HDGi1MDpA1GcaUWtSS6x59OtiRm8iAFhj/ShvtcL2xBLYbisVlsaG00JgzovR4Dwhh+QfTyuyrnxVA8lvykGD7FH2pg+GPYq8KKoi54NBmtotuq8Dg27MBwBteXpm61NNXrnZRx5rG6RBOTynFDYM44fPjw+vXrO7HjypUrT5065YaIAAAgJo1T/UDjpIAzjw1Vuvj+rv086joPHz7s5h2xEJfBbRYbnBRw9h7+/ed12TMCAsOZ7oisqKho27ZtlZWVFoslISFhwYIFAwYMeOutt+7evYsU2L9/f2Ji4rlz5/bu3VtbW0un09PT05cuXRoeHo7UPhKJFB0dvW/fvo0bNy5ZsgTZi8vl/vjjj9CjNRutO9ZWz/usj6MCzuqjRmXm8OA0UD6HTqdbvHhxbGzsrl27CgoK4uPjFy1apFQqN2/e3Ldv35ycnIsXL8bFxZWWlq5du3bYsGF79+7dunWrTqdbvnw5cgQajVZZWfno0aOtW7empaWdPXsWALB8+fKTJ0+6I2AqnUyhkAw6i8MCTnbWqixsnltadCQSiUajmTBhQkxMDABg2bJlY8aModPpTCaTSqXS6XRfX18AQFRU1N69e+Pj46lUKgAgPz//vffek8lkAoEAAFBfX//dd9/5+PgAAAwGAwCAzWYjH90Bh0/RKC2OmrgcerRarSwOmUR2S8NiZGRkVFTU2rVrc3Nzs7KyEhMTMzMzOxbjcrlisfjLL7+sq6vT6/UmkwkAoFQqEY9RUVHus9YRJoditTi8Bzq8rslkss0GdGqHNbkrUCiUnTt3jh49urCwcNasWa+88sqZM2c6Frtw4cKqVatSU1O3bt164MCBNWvWtN/K5XbrM7C1ycjhO6x2zu6PbD5VqzS7Jyrg5+e3ePHikydPHj58ePDgwevWrev4wC0sLBw4cOC8efOio6P9/f31emdvcG7FarEZdFYW1+FdzpnH0Bim1j31USwWtz1VY2NjV69eTSaTHz9+jHzT9gphNBqRGyXCuXPn2m/tiPvGKqkV5uhkZ6/Szjz6hzEq76ndEBWQSCQrVqzYt2/fkydPampqdu7cSSaT09LSAAA8Hk8kEolEIrlcnpqaevPmzZKSkoaGho0bN/r7+wMAysrKOlZMBoPBYDDu3r0rEonMZvjXUNUDDV/g7JlMcfLjgeNDvXGipX821jZh7ISGhoaGhh47dmz37t0nT57UarWrVq1KT08HAPj4+Jw5c+b48eP9+/fPycmpqKj49ttvz549m5mZuWTJkuLi4u+//z46Orq2tlatVk+aNKntmFartbCw8Pz587m5uQwGA27Av5yWpg7zcdabYnPK+T0NTXU652V6PEa9ufDLOudlUNp7Egfyfjkjg/vfFnfcPCuLRus+RPm5EpXEuXtJLq7UhcWx7BZYuHBhSUmJ3U0Wi4VCsf+A+/DDD0eMGOH81J1m5MiRjuJBXrnsbr148SLytv8cGqW5okj9+kcxzk+K3s/VWKsvvqEYk2+/u0er1SLxdcRsNtuNDADAYrEcbeo6KpX9tkLk+ePovDye/bbqn0+3BIQy4tFasjH1Fz64oZBKDCNzA1FL9jCKr8tbm0wjpgWglsTUX5g23MdmBbfOSWHEhhsq76kr76uxSHRtHMCdS60Ws23w2M4Mf8Ed5XdVVSWacX/A2tXswvCqzFF+ZpP1/B5JZ2PDDb9dkFU9cEFiZ8ZJld9VXT3WNGScMP13vhiK44yKItXPp6Rpw/gDRrl22XVm3J7JYPn5tKzqgTp9uG9MGkcQRHf1CN6GqtVUXaJ5UqqhsyhDXxF2YhRY58eRquXm4hvy6gcaqxXEpHGoVBKHT+ULqBYcDCMFFApJJTdplRad2tJQpdNrrTGpnOQhvIDOdqJ0aTwugrzZKHmiV7WaNUozmUJSySA3E9y/fz8lJQXu+ybXl2o129h8CseXGhTJDAjr6u9xCB7dzejRo48ePdq+Ac0LIeYrwIHwCAcceExMTPR0COjgwKNIJPJ0COjgwGN3dq52Ghx4VCgUng4BHRx4DAkJ8XQI6ODAY0NDg6dDQAcHHlNSUjwdAjo48FhaWurpENDBgUdcgAOPyDAKLwcHHlta0KeveBwceCTqIxyI+tiLwIHHPn0czhLwHnDgsW18qTeDA4+4AAcek5KSPB0COjjw6NYJb7DAgUdcgAOPRHsPHIj2nl4EDjwS/a5wIPpdexE48Ej0X8OB6L+GA9HeAweivacXgQOPQUGYVmD0LDjw2NjY6OkQ0MGBx+TkZE+HgA4OPJaVlXk6BHRw4JGoj3Ag6iMckIXhvBzvnYc0YcIEZA5XS0uLQCAgk8k2m83f33/Xrl2eDs0O7lrcoOuQSKSnT58i/5ZIJMgycIsXL/Z0XPbx3uu6f//+z10rMTExo0aN8lxEzvBej7Nnzw4O/u9MchaLNXPmTI9G5Azv9ZiYmNivX7+2j3369MnJyfFoRM7wXo8AgFmzZiE/rtlsdn5+vqfDcYZXe0xKSsrIyLDZbDExMd5cGTvzvDYarC1ig17bTbP+x704p77cNDlnSlWJs2WnIUJnkIQhDCdLPdrFtffHf++XPC7WBEezyO5Z79UboLPIdSJNeBxrdH4QjYH1esXq0Wq1FX4l7pPO75PB71qc+KCxVvfr2eZpC8OYHEwVE6vHk1+L4zN9IxK7e3l7D6KWm87vFs9dF42lMKZ6W1OmYfKovUoiklYhfgC/+AakPD4AgJanRgazp6WGwwLHh9r4xFk6gDYwedRpLD4BuF/sqBP4+NONBkxvJpg8mo02i8lLm4XcitUC9NhWrPbq93AcQXiEA+ERDoRHOBAe4UB4hAPhEQ6ERzgQHuFAeIQD4REO3u5x0pRRe/bu9HQU6HjeY3X14xn5Ex1tnf/Okqys4d0bUWfw/LiU8nJn06vHjnWo2KtwV32cPHX00WMHVr6/KGfcC2q1GgBw6fL5d+bNHv/y8Km5OV9u24Tk2NpdsP2vn61vbJRkjxp49NiBwhOHp0wb89NPV6dMG/P1N188d12XVzxasXLhpCmjXn7lxQ/+skwiaQAA7Pxu28RXRyCpDREOHipwflJ34C6PVCr11OnjsTFxWzZtZzKZN278+MmGNZmZQ3Z8e3DF8nXXrl/atGUDAGDG9DlTp84IDAw6cfziKxOn0Wg0vV53vPDQyhXrJ03Ka3/AxkbJe0vfJpHJWzZt3/T5N0qVYunyeUaj8aXssRqN5s7dW20lr127lDVkOJfLdXRSd+AujyQSiclgvv3WopSUdCqVeuDQ7oyMAX96c2F4WETWkGF/evPPFy/+q6mpkclkMugMEonk4+PLYDBIJJJer8+dlp81ZFhoSFj7A/5w6iiJRFq7ZkNsbFzfxOTVqz5uaBBfvXYpNjYuMjL6xo0rSLHGRskjUdmoUeMAAHZPKpW6ZVUlNz5nUlLSkX9Yrdby8ocDM7PaNvXLyAQAVFVV2N0xOTmt45cPH5b0TUzhcZ/lJQoKCg4JCausFAEAskfm/PTzVavVCgC4dv0Sh8PJGjLc0Umrn7hlVpMbnzMczrPcYHq93mKx7C7YvmfvjvYFpDL7VaNtx/ZoNOqKSlHOuBfavjGZTMgRXsrOKdjzbUnJ/fT0/levXRo+LJvBYCAJrzqetLXVLWnbuuN5jWQRnjplxssTJrf/3tfPhdwkHA43La3f0iX/k+KVxWIDACIjo2Nj467fuBIaGl5aWjznD285OalA4JZV57rDI5lMjo/v29jYEBn5rE/dZDI1NTfyeS4MzUhKSj1/4XRoaHhbwoq6uhqh8JmU7JE55y+cDg+P9PMTDOg/yMlJ3ZRdt5vew2dM/8O165cPHNxdV1dTUSn6dOMHi959Q6PRAAC4XJ5U2lJcXIS8xzjilYnTdDrt3z5bX1Epqq+v3bN35x/feO3Ro2dLgGRn59TX1546fWzkyDFtmfXsnlSr1brjD+wmjy/+7qXV73986fK519+cvnzFApPZtGXTdg6HAwAY9dK40NDwpcvn/eucs1TqwcEhmzdtl8mki9594535s2/99vMnH29ueyKFhYYnxPd9/Lhi9EvjnJ+UzWa74w/ENL7nxyPNXD964iAczMuHS1Ot/t7llmnvok888fzv654B4REOhEc4EB7hQHiEA+ERDoRHOBAe4UB4hAPhEQ6ERzgQHuFAeIQDJo8sHoVM7bETCp1iwzjhBZNHvh+1qUbX5ZjwR1O9nsnBpAhTofBEllYJOas1LlA0GaOTMbX7YvLI86X1Hcy78j0O8ghC5NezzXwhNTwek0cX5l9X3lPfOi9LHOQjDGUy2T12uqHFZG0W6xuqtMIQ+uCxWHs0XZvHLm0w3L+mkDeblFIThuJwMBgMdDqdROqmB50ghMFkkxMGcKKTXehZ9N71pNog8tr3IgiPcMCBRyJvChyIvClwINZhhwOxDjsc+vbt6+kQ0MGBx0ePHnk6BHRw4JG4P8KBuD/2InDgMT4+3tMhoIMDjxUV9qeHeBU48IgLcOCRyWR6OgR0cODRfZMrIYIDj3w+DlZAxYFHpVLp6RDQwYFHXIADj2FhYRhKeRgceBSLxZ4OAR0ceMQFOPBItPfAgWjv6UXgwCPR7woHot+1F4EDj8TzGg7E8xoOXj5iDwEHHuVyTJlLPAsOPOICHHhMTEz0dAjo4MCjSCTydAjo4MBjUlKSp0NABwceHz50tvCrl4ADj8S4PTgQ4/bggIv7o/fOQ8rLy2MymWQyuby8PDw8HPk3k8ncvn27p0Ozg+fXD3fE48ePyeRnl0t1dTUAgEKhEHntXWbw4MHPfRMRETFjxgwPhYOC93qcO3du+xEpZDJ56tSp3TZb01W812NWVlZCQkLb7Ts8PHz69OmeDsoh3usRqZI+Pj7InTEvL69t4VsvxKs9ZmVlJSYm2my20NBQb66MWJ/XZpNVp+6mRPbPMSP3jzWPm/KmzNIorAB4IAYanYxlqQ+U98eHt5TF1xUyiZHF9d5ryq0w2BSjzpLyAn/gGGdrLDjzeOuCrOWpqd8IAU9Ac0+Q+EAtN1XdV6lajePmBDsq49Djr+dkSqk5a2KgOyPEE2U35bIG/fi59lXav/Jbm4wtYgMhsT3JWb50FuVJmcbuVvseW8QGm81L33g9CJ1JaayxP+jfvke1whIQgYPZFt2MMJSh19p/Z7D/3mMyWE04mGzR3VjNNkfrk3n1eziOIDzCgfAIB8IjHAiPcCA8woHwCAfCIxwIj3AgPMKB8AgHwiMcerjH9R+uPHf+VDecqId7LC/vprGT9vsVbp2XGfUgY6QL+YBbWpo3bdlQVPQbl8vLnZav0aivXb9csOsoAMBsNu/b/93lKxcaGxsCAoLycmdOejUXAFBTUz339bzNm745dvzggwf3yGRy9sgxC+YvRfqp5fLWr77Zcv/+HYVCHhsb/6c3F/bvNxAAUHji8J69O5a9t/bzzZ/kjHl53juLW1tlX2//4u7dWyqVMiAgaOrk6VOnzgAAZI8aiMTG5XJPnfwRSXN/5Mi+mtpqFov9UvbYN99Y4NKiNjVl6rpHqvF/DOm4Cdo4qc83f1JZKfr4o00CP+HOf26rrX1Cpz/L8PDN9v87c7Zw8aJVKakZd+78+uW2z6lU6ssTJlOoVADAtq82LXn3/U8+2nTn7q1ly+enpfXPHjnGarWuXPVntUa9csV6ocD/5A9HVr2/6Otte2Jj42g0ml6vO154aOWK9Ugu4c8+/6iu9skHaz4VCIQPSu5t2rwhMCh4+LCRhw+dfW3GhD8vXI6kZ0fS3Of/fu7atZ/W19du3rJBoZSvef9jKH8+nOtaJpPeuvXzrJlvDBqY1adP/NrVG5SKZ5Ne1Gr1yR+OTH9t9tixE8PDIia9mjs2Z+KBg7vb9h3x4mgkc3vmgMGhIWEiURkA4PadX8srHi1bunZA/0FRUTELFywLCgo5XngIAEAikfR6fe60/Kwhw0JDwgAAC+Yv/eyzbRkZAyIioiaMnxTXJ+H27ZsAAD7fBwDAZrN9+D6O0tw3NTVCMQCnPorFdTabLTUlA/nI4XAyM4fU1FYDAB4/Ljebze3zy2dkZJ45e6Itf3Kf2P8uA8fl8tRqFZLFnkajIZnokUFS6Wn9kSz2CG2ZhgEALCbrwKHd9+7dVijkVqtVpVKGhUU8FyGS5n7unLfbvkEOXlVVERgY1HUDcDwqFHIAAKtdSmSkLgAAtFoNAGDJ0rfbhoohd2RZqxT5SGcw2h8K2arVakwm09jxQ9u+t1gsAoGw7SOH82zRfrPZvGLVQovFsnDBssiIaAqFsvYvSztGqNfr7aa5l8paYAiA5BFxYWi3gJZK9WzxIuQPXrP6k9iYuPa7BAYENTU7vKY4HC6dTt+x/UD7L9uGlbbn4cOSqqrK/9uyIz29P/KNQt4aEhz6XDFHae59/Vx4ljoBjkfkOnokKo2NjQMAaDSaO3d+FfoHAABiY+NpNFprqyxyxLP88nJ5K4lEansK2aVv3xSj0WixWGJink0alkgafH39OpY0GA3tq39paXGD5GliYnJbAaSCO0pzz+fBWfQLznMGSYe+f/8/S0uLa2ufbPzbX/z+cw1yudyJE6fuLth++cqFpw3ionu3l62Y/9fP1js/YOaAwfFxiZ9u/ODevTsNkqcXL5176+38kz8c6Vgyrk8CnU4/XnhIKm357fbNrf/4bNDArLr6mtZWGYPBYDAY94vvVlSKzGaz3TT3Go39fn1Xgfbes3bNhr9v+njJ0rf9hQEzZ74uFPg/evRsPYT57yzhcXnf7tgqlbYIBMKhL7z4xusLnB+NQqH87a//+Hr7F+s+XKHX64KDQ2fPfjMvd2bHkr6+fiuWr9u588sL/z6TkJC0csX65pamjz95/71l7+z67vDvZ8w99H3BL79c37f3BJLm/uCh3bt2f8PhcFNTM7Zs2s7hcKD8+dDew/V6vcls4nF5yMf3lr7D5/usX/c3KFF6Cd3xHr56zWJZq3TpkjV+foJfbl4vund744YvYB3c+4F5XX/19eYP1i0zGPShoeGrVqzPyhoO6+DeDzSPAoFw7ZoNsI6GO3p4e0+3QXiEA+ERDoRHOBAe4UB4hAPhEQ6ERzgQHuFAeISD/d+FdCbJCoj5M89DppA4PvaN2a+PPD9ac69MZO+cFrHe0XxV+x4DIxjeuoCBJzHqLcEx9scNOKyPYXHMa8ckbg4MTxRdlpJIIMJBmntn84ZLf1FU3FNnjBD6BdEp1N77RJI26B/fV9JopBenBjgqgzKPvbpUc++qXFKtp1A9dp1brBYymeKp07M4FBqTnDqUlzrU2fKyWNeTMug8s64CAGDy5MkFBQXIgh/dD51JxvKowNoezmB57Lo2WbR0JsmDAWDBq4PDETjwSKzDDgdiHXY4EPk+4EDk+4ADUR/hQNRHOBB5SeFA5CXtReDAI/GcgQPxnOlF4MBjVFSUp0NABwcea2pqPB0COjjwiAtw4NFTLeEugQOPCoXC0yGggwOPdqcVehs4CNFq9VgXG3Zw4BEX4MAjkZcUDkRe0l4EDjwS/a5wIPpdexE48Ei048KBaMftReDAI4/H83QI6ODAo0ql8nQI6ODAI/GcgQPxnIFDWFiYp0NABwcexWKxp0NABwceQ0OfXzzPC8GBx6dPn3o6BHRw4DE5ORlDKQ+DA49lZWWeDgEdrPO5up/MzEybzUYmk61WK/L/FAplzpw5Cxcu9HRodvDe+hgXF4csqYv0u5LJ5PDw8Pz8fE/HZR/v9Th79uznFkkfN26cQABnOVvoeK/HiRMnxsTEtH2MiIjIy8vzaETO8F6PAICZM2ey/7OW9tixY722Mnq7x/HjxyNVMjo6+rXXXvN0OM7wao8AgOnTpzOZzPHjx3tzZYT23mM2WqtLNXUVBmmDQae2UOlkpdQIIzwAADCbTFQqFUBaeMQvkKHXmFlcqm8QLSSaEZfOdbQEikt01WNdubboirK+QsMLZPMDOGQqicagUhkUEtlL11shAZtRbzEbLBazVd2iVbdoffzp/Ub69B3YpVb3znuU1OivFUp1Gpt/tC9HwOpKEJ5FI9fL65UWo+l3U/xjku0vh4JKZzzabOD6D611Ip1PKJ8rxLHB9uhUBmm13C+QOn5OYCcGXHbG49ldEqWSHJwgxFAWZ8jqlEalZsaycFd3dNnjvw82K5UUYSQOxmx3DrVUp5Mp8xa51ujpWg0+v6dRperJEgEAXCGLJeAd/LzOpb1c8HjnUqtcThJE9GSJCFwhm+nDvbC/CfsuWD3KGg1lv6mD4nvgPdEufuF8WZO16gHWrnOsHq8XSn1Cen5NbI9fhM/1QhnGwpg8Sp7oW5st/EA4qVrwAoNDp3MZZTcxzd7B5LHoR7k33xaPn/r73//xe3cc2S/Cp/gnTJc2Jo/VJRqufw9533YJJpeuajUrZSbUkugea0VanpBBpnh7y5Cb4Pmzqx6g5/BCX2+vqUbPEbrxzlhUfOHqTwcam6sZDHb/tJzxo+fR6UwAwJ5Dq0kkkBj/wpVrexSq5kD/qCkTl0VFpAEAFMrmIyc2VFbfYTK5Lwya6r7YAAAcIatZjL5UMHota5GYSG5bxbKk7Or+Ix8kxA1eumDf9CkfFJdePvrDRmQThUKtrrlfW1e6eP6e9SvPsdk+3x//BNl08Nh6SVPVG7O3zPvjVxqN/EHZFTeFBwCg0CgtYgNqMXSPGoWZxoCWNuk5Ll/fExs9YMKY+f7CiKSEoS/nLLh7/5xc8Sw/pNGoe3X8YgadRaczB6SPa2p5YjTq5Yqmyqrb2b/7Q3zswKDAmCkTlzEZbrxcaAyKVmVGLYbukUIlURkQWjo7YrVa658+TIgb3PZNbPQAAECDpBL56C+MQK5xAACbxQcAaHXKpuYnAIDI8GeDLEgkUkS4GwdcUBkUJpeK2gqBXtEMOivN5JYZpyaT3mq1XLi8499Xvmv/vVL1LOcqlcrosJPNYNQ+t4lB72SjIRYsJqtGbiKhtcaje+T4UM0G9IrdCWg0JoVCHZ41fUjmq+2/53KcdcXQ6SwAgF6vbvtGp3fjwGezwcLioltCv655fhSTwQIpqv89N5kcFtK3Vd4QGBCN/E/gF0YmU9lsZwuaBQgjAQBPJRXIR4vF/Lj6rjvCQzAbLWw++m0N3WNwJNOkRX9gdY6Rw2c9KLty+VpBU3ON+KnowNF123a+pdc7e18T+IVERaRdvlYgqvxV/FR05MSnVCrNTeEBAHQKQ3BUx9vL86B7jEnlyCVaSFE9T3pK9u+nfVhUfGHTl/nfFiyyWEzzXv+KyUR5/s7M+yjAP/Kf+5bu2POur2/wgIzxNretGaCVaePSuajFMLWHf7+5nhvsx/Gzn1qgB2M2WKpvif/0aQxqSUy/9tKH81XNcPIb4wtFozplKKbVJzG9YCcN5v96rtWgMTI49pN/37x94vT5f9jdZDYZqDT795cZU9elJr2IJQAsVNfc+26fnYz2AACz2Uil0OyOJMh9dVW/tDGOjtnwSDbt7ThHW9uDtZ/r8QP1L/9ShqcF2d2q12u0OvvtdFqdis2y38XO5QjaXrO7jslkUKmlDsJT0+lsu+vXcDh+DLr9pqzGCllsEmXQGEzjYVzoL/xXgcRMZvP8e0Vrrl5jVNRKp7+HtQPWhdaw8XOCpVUygwa9Ma4HUPmT+LXFLsx/cq1VcfaaqMbyJrPRLa/l3kPtvYZZqyNdGqLkmkcKlZS/LLzqZr1a1jOzdxl1poeXn0x+O9A3wP4T1RGdHCd15It6MosljMTBCkXYkdUr5fWKWe9H0pkuN/53frzZbxdkt87LguMFwijv7QLDiLxB3fxYFtePm53nMHOUc7o0/tFitl073lIj0lIZNK6QwwtgUWhuaal0B1aLVS3VqZq1WrkuNJY1Yqo/17fzzdUQxuOajNaaMm15kVrVamkR6xgsKlfINOnd0tTWdZg8mrJJZ9RZeP50Lo+SmMmNTmFjaRlzDuT5XBazTaM0a1UWi8lLp4mRySQWj8zhU2kMmD2g3jsvDl/00l5p6BAe4UB4hAPhEQ6ERzgQHuHw/6dv5ct5mp8JAAAAAElFTkSuQmCC", 391 | "text/plain": [ 392 | "" 393 | ] 394 | }, 395 | "execution_count": 27, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "def retrieve(state: State):\n", 402 | " retrieved_docs = vectorstore.similarity_search(state[\"question\"])\n", 403 | " return {\"context\": retrieved_docs}\n", 404 | "\n", 405 | "def generate(state: State):\n", 406 | " docs_content = format_docs(state[\"context\"])\n", 407 | " rag_prompt = rag_prompt_template.format(\n", 408 | " question=state[\"question\"],\n", 409 | " context=docs_content\n", 410 | " )\n", 411 | " response = llm.invoke([\n", 412 | " HumanMessage(content=rag_prompt)\n", 413 | " ])\n", 414 | " return {\"answer\": response.content}\n", 415 | "\n", 416 | "\n", 417 | "graph_builder = StateGraph(State).add_sequence([retrieve, generate])\n", 418 | "graph_builder.add_edge(START, \"retrieve\")\n", 419 | "graph_builder.add_edge(\"generate\", END)\n", 420 | "graph = graph_builder.compile()\n", 421 | "graph" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 28, 427 | "id": "5faffc5a-bf35-4891-a23c-4696c12ce831", 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "Task decomposition is the process of breaking down a complex task into smaller, manageable steps or subgoals. This can be achieved through various methods, including prompting a language model, using task-specific instructions, or incorporating human inputs. It enhances the model's performance by allowing it to tackle each component systematically.\n" 435 | ] 436 | } 437 | ], 438 | "source": [ 439 | "response = graph.invoke({\"question\": \"What is Task Decomposition?\"})\n", 440 | "print(response[\"answer\"])" 441 | ] 442 | } 443 | ], 444 | "metadata": { 445 | "kernelspec": { 446 | "display_name": "Python 3 (ipykernel)", 447 | "language": "python", 448 | "name": "python3" 449 | }, 450 | "language_info": { 451 | "codemirror_mode": { 452 | "name": "ipython", 453 | "version": 3 454 | }, 455 | "file_extension": ".py", 456 | "mimetype": "text/x-python", 457 | "name": "python", 458 | "nbconvert_exporter": "python", 459 | "pygments_lexer": "ipython3", 460 | "version": "3.11.11" 461 | } 462 | }, 463 | "nbformat": 4, 464 | "nbformat_minor": 5 465 | } 466 | -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/02-indexing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "66828cb1-f3c2-4d37-a20e-ed22501f557b", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "from pathlib import Path\n", 12 | "import re\n", 13 | "\n", 14 | "from dotenv import find_dotenv, load_dotenv" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "bdf02132-e1af-4492-8fbd-93f3b49d9b4f", 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/plain": [ 26 | "True" 27 | ] 28 | }, 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "output_type": "execute_result" 32 | } 33 | ], 34 | "source": [ 35 | "load_dotenv(find_dotenv('.env'))" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "id": "bc4ea5ac-266b-4cd3-a01b-ccdef04cc2cc", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"RAG From Scratch: Part 2 (Indexing)\"" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "id": "90c5b359-5120-4613-a0ab-0c2402462dfe", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "DATA_PATH = Path('./data')\n", 56 | "VECTORSTORE_PATH = DATA_PATH / 'vectorstore'" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "id": "17ee3446-d8cc-4fb6-851e-c27b9806aded", 62 | "metadata": {}, 63 | "source": [ 64 | "# Part 2: Indexing" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "e21c8db7-7b68-4c57-aac3-df18f4cf5e41", 70 | "metadata": {}, 71 | "source": [ 72 | "![](images/02-indexing.png)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "c321cec8-1e53-4e81-9751-72aecd8b6c57", 78 | "metadata": {}, 79 | "source": [ 80 | "## Configure components" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "id": "4eb815c5-6de0-40e6-a698-6a5e31b3102c", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "from langchain_openai import OpenAIEmbeddings" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 6, 96 | "id": "a9c9f25d-589f-4aab-852d-370a6a90f658", 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "1536" 103 | ] 104 | }, 105 | "execution_count": 6, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "embeddings_model_name = \"text-embedding-3-small\"\n", 112 | "embeddings = OpenAIEmbeddings(model=embeddings_model_name)\n", 113 | "len(embeddings.embed_query(\"Hello\"))" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "id": "67ae5556-a40c-4059-8144-1cf9164fb857", 119 | "metadata": {}, 120 | "source": [ 121 | "## Load documents" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "id": "c3b5f058-7604-4101-9fba-0c10e225ae3c", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stderr", 132 | "output_type": "stream", 133 | "text": [ 134 | "USER_AGENT environment variable not set, consider setting it to identify your requests.\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "import bs4\n", 140 | "from langchain_community.document_loaders import WebBaseLoader" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "id": "1c432b82-b767-4fd8-b75b-ef108726a41d", 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "1" 153 | ] 154 | }, 155 | "execution_count": 8, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "loader = WebBaseLoader(\n", 162 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 163 | " bs_kwargs=dict(\n", 164 | " parse_only=bs4.SoupStrainer(\n", 165 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 166 | " )\n", 167 | " ),\n", 168 | ")\n", 169 | "docs = loader.load()\n", 170 | "len(docs)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 9, 176 | "id": "f51ac995-833e-4670-aa5d-84d75b6c479e", 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "\n", 184 | "\n", 185 | " LLM Powered Autonomous Agents\n", 186 | " \n", 187 | "Date: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\n", 188 | "\n", 189 | "\n", 190 | "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n", 191 | "Agent System Overview#\n", 192 | "In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n", 193 | "\n", 194 | "Planning\n", 195 | "\n", 196 | "Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\n", 197 | "Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n", 198 | "\n", 199 | "\n", 200 | "Memory\n", 201 | "\n", 202 | "Short-term memory: I \n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "print(docs[0].page_content[:1000])" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "id": "b02e6b41-544d-4c0e-be1d-0737617b8316", 213 | "metadata": {}, 214 | "source": [ 215 | "## Split documents" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "id": "027b5bbd-04ac-4922-888a-f2cbbcc33ccc", 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "from langchain.text_splitter import RecursiveCharacterTextSplitter" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 11, 231 | "id": "81aa6281-08a7-401a-9277-2fab14cd4946", 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "66" 238 | ] 239 | }, 240 | "execution_count": 11, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 247 | "splits = text_splitter.split_documents(docs)\n", 248 | "len(splits)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "id": "7857e737-7d88-4804-a8c4-96cd162f0821", 254 | "metadata": {}, 255 | "source": [ 256 | "## Store documents" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 12, 262 | "id": "d510ea37-14e6-4f18-99ca-d092197cd939", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "import chromadb\n", 267 | "from chromadb.config import Settings\n", 268 | "from langchain_chroma import Chroma\n", 269 | "from langchain_core.documents import Document" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 13, 275 | "id": "8a0218b5-2b6a-40fc-986e-023808e8ceab", 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "def get_collection_size(vectorstore):\n", 280 | " try:\n", 281 | " collection_size = len(vectorstore.get()[\"ids\"])\n", 282 | " except Exception as _:\n", 283 | " collection_size = 0\n", 284 | "\n", 285 | " return collection_size" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 14, 291 | "id": "81904bc1-cf89-4ebf-a6da-eb8112121896", 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "0" 298 | ] 299 | }, 300 | "execution_count": 14, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "collection_name=\"embeddings\"\n", 307 | "\n", 308 | "vectorstore_settings = Settings(anonymized_telemetry=False)\n", 309 | "client = chromadb.PersistentClient(\n", 310 | " path=str(VECTORSTORE_PATH), settings=vectorstore_settings\n", 311 | ")\n", 312 | "\n", 313 | "Chroma(collection_name=collection_name, client=client).delete_collection()\n", 314 | "\n", 315 | "vectorstore = Chroma(\n", 316 | " collection_name=collection_name, embedding_function=embeddings, client=client\n", 317 | ")\n", 318 | "\n", 319 | "get_collection_size(vectorstore)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 15, 325 | "id": "93ef079a-b68c-45e5-b727-5b855e0ad3a1", 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/plain": [ 331 | "66" 332 | ] 333 | }, 334 | "execution_count": 15, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "vectorstore.add_documents(splits)\n", 341 | "get_collection_size(vectorstore)" 342 | ] 343 | }, 344 | { 345 | "attachments": {}, 346 | "cell_type": "markdown", 347 | "id": "9bcb5d79-c382-4166-96dd-e28cc434053e", 348 | "metadata": {}, 349 | "source": [ 350 | "**Tokenization**\n", 351 | "- [OpenAI tokenizer](https://platform.openai.com/tokenizer)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 16, 357 | "id": "252fe61e-f27d-45fb-b6dd-bf599036a926", 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "import tiktoken" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 17, 367 | "id": "357d2986-13d3-40ba-b6c9-921d85880582", 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "def num_tokens_from_string(string: str, encoding_name: str) -> int:\n", 372 | " \"\"\"Returns the number of tokens in a text string.\"\"\"\n", 373 | " encoding = tiktoken.get_encoding(encoding_name)\n", 374 | " num_tokens = len(encoding.encode(string))\n", 375 | " return num_tokens" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 18, 381 | "id": "e136e704-ecc5-4373-a86a-1af5adc16091", 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "query = \"What kinds of pets do I like?\"\n", 386 | "document = \"My favorite pet is a cat.\"" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 19, 392 | "id": "ae17be41-bbe9-4be9-818f-21a3d504f231", 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "data": { 397 | "text/plain": [ 398 | "'cl100k_base'" 399 | ] 400 | }, 401 | "execution_count": 19, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "openai_encoding_name = tiktoken.encoding_for_model(embeddings_model_name).name\n", 408 | "openai_encoding_name" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 20, 414 | "id": "bd11a8bc-fea6-43a1-80e5-c3d3304df479", 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "8" 421 | ] 422 | }, 423 | "execution_count": 20, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "num_tokens_from_string(query, openai_encoding_name)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 21, 435 | "id": "88df3809-be58-444e-ac4f-88325dec10bd", 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "data": { 440 | "text/plain": [ 441 | "7" 442 | ] 443 | }, 444 | "execution_count": 21, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "num_tokens_from_string(document, openai_encoding_name)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "id": "7390af5d-98b7-444e-b22c-ee0179c622af", 456 | "metadata": {}, 457 | "source": [ 458 | "**Embeddings**" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 22, 464 | "id": "e39b7f41-bb30-4120-b11c-14549b283a9b", 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/plain": [ 470 | "(1536, 1536)" 471 | ] 472 | }, 473 | "execution_count": 22, 474 | "metadata": {}, 475 | "output_type": "execute_result" 476 | } 477 | ], 478 | "source": [ 479 | "query_embeddings = embeddings.embed_query(query)\n", 480 | "document_embeddings = embeddings.embed_documents([document])[0]\n", 481 | "\n", 482 | "len(query_embeddings), len(document_embeddings)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "id": "5ac15fad-1c5b-4039-a70a-e66b1dbc7e4d", 488 | "metadata": {}, 489 | "source": [ 490 | "**Cosine similarity**" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 23, 496 | "id": "86ddb065-77d3-43dd-a0a5-973dd940fced", 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "import numpy as np" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 24, 506 | "id": "15569e34-6421-47b6-8b01-797e9c106a5c", 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [ 510 | "def cosine_similarity(vec1, vec2):\n", 511 | " dot_product = np.dot(vec1, vec2)\n", 512 | " norm_vec1 = np.linalg.norm(vec1)\n", 513 | " norm_vec2 = np.linalg.norm(vec2)\n", 514 | " return dot_product / (norm_vec1 * norm_vec2)" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 25, 520 | "id": "90a29dcc-6f34-479f-b6a3-cd8b8f6eb340", 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "name": "stdout", 525 | "output_type": "stream", 526 | "text": [ 527 | "Cosine Similarity: 0.546556128332727\n" 528 | ] 529 | } 530 | ], 531 | "source": [ 532 | "similarity = cosine_similarity(query_embeddings, document_embeddings)\n", 533 | "print(\"Cosine Similarity:\", similarity)" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 26, 539 | "id": "2b48c94b-b16c-484b-9e61-19d6ae8aafc3", 540 | "metadata": {}, 541 | "outputs": [ 542 | { 543 | "name": "stdout", 544 | "output_type": "stream", 545 | "text": [ 546 | "Cosine Similarity: 0.09272330847288396\n" 547 | ] 548 | } 549 | ], 550 | "source": [ 551 | "non_relevant_document = \"The weather is fine.\"\n", 552 | "non_relevant_document_embeddings = embeddings.embed_documents([non_relevant_document])[0]\n", 553 | "\n", 554 | "similarity = cosine_similarity(query_embeddings, non_relevant_document_embeddings)\n", 555 | "print(\"Cosine Similarity:\", similarity)" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 27, 561 | "id": "7e5b821b-69d2-46e9-a0f0-69206421fe8a", 562 | "metadata": {}, 563 | "outputs": [ 564 | { 565 | "name": "stdout", 566 | "output_type": "stream", 567 | "text": [ 568 | "Cosine Similarity: 1.0\n" 569 | ] 570 | } 571 | ], 572 | "source": [ 573 | "similarity = cosine_similarity(query_embeddings, query_embeddings)\n", 574 | "print(\"Cosine Similarity:\", similarity)" 575 | ] 576 | } 577 | ], 578 | "metadata": { 579 | "kernelspec": { 580 | "display_name": "Python 3 (ipykernel)", 581 | "language": "python", 582 | "name": "python3" 583 | }, 584 | "language_info": { 585 | "codemirror_mode": { 586 | "name": "ipython", 587 | "version": 3 588 | }, 589 | "file_extension": ".py", 590 | "mimetype": "text/x-python", 591 | "name": "python", 592 | "nbconvert_exporter": "python", 593 | "pygments_lexer": "ipython3", 594 | "version": "3.11.11" 595 | } 596 | }, 597 | "nbformat": 4, 598 | "nbformat_minor": 5 599 | } 600 | -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/01-01-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/01-01-overview.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/01-02-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/01-02-overview.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/02-indexing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/02-indexing.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/03-01-retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/03-01-retrieval.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/03-02-retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/03-02-retrieval.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/04-generation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/04-generation.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/05-multi-query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/05-multi-query.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/06-rag-fusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/06-rag-fusion.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/07-01-decomposition-recursive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/07-01-decomposition-recursive.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/07-02-decomposition-parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/07-02-decomposition-parallel.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/08-step-back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/08-step-back.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/09-hyde.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/09-hyde.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/10-01-logical-routing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/10-01-logical-routing.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/10-01-structured-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/10-01-structured-output.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/10-02-semantic-routing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/10-02-semantic-routing.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/11-query-construction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/11-query-construction.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/11-self-query.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/11-self-query.jpg -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/12-01-multi-vector-summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/12-01-multi-vector-summary.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/12-02-multi-vector-chunks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/12-02-multi-vector-chunks.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/12-03-multi-vector-hypothetical-questions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/12-03-multi-vector-hypothetical-questions.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/13-raptor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/13-raptor.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/14-01-colbert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/14-01-colbert.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/14-02-colbert.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/14-02-colbert.jpg -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/15-crag-implementation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/15-crag-implementation.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/15-crag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/15-crag.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/16-self-rag-implementation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/16-self-rag-implementation.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/16-self-rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/16-self-rag.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/generation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/generation.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/indexing-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/indexing-01.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/query-construction-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/query-construction-01.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/query-translation-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/query-translation-01.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/query-translation-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/query-translation-02.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/rag.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/retrieval.png -------------------------------------------------------------------------------- /notebooks/rag-from-scratch/images/routing-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/routing-01.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "llm-rag" 3 | version = "0.1.0" 4 | description = "LLM RAG" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "langchain-chroma>=0.2.2", 9 | "langchain-community>=0.3.18", 10 | "langchain-openai>=0.3.7", 11 | "langchain-weaviate>=0.0.4", 12 | "langgraph==0.3.31", 13 | "langgraph-cli[inmem]>=0.1.74", 14 | "lark>=1.2.2", 15 | "pandas>=2.2.3", 16 | "python-dotenv>=1.0.1", 17 | "pytube==11.0.2", 18 | "rich>=13.9.4", 19 | "scikit-learn>=1.6.1", 20 | "transformers==4.49.0", 21 | "umap-learn>=0.5.7", 22 | "youtube-transcript-api==1.0.3", 23 | ] 24 | 25 | [project.optional-dependencies] 26 | ragatouille = ["ragatouille>=0.0.9"] 27 | 28 | [dependency-groups] 29 | dev = [ 30 | "anywidget>=0.9.18", 31 | "ipywidgets>=8.1.5", 32 | "jupyterlab>=4.3.5", 33 | "jupyterlab-git>=0.51.0", 34 | "plotly[express]>=6.0.1", 35 | "ruff>=0.9.7", 36 | ] 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv export --no-hashes --no-annotate 3 | aiohappyeyeballs==2.6.1 4 | aiohttp==3.11.16 5 | aiosignal==1.3.2 6 | annotated-types==0.7.0 7 | anyio==4.9.0 8 | anywidget==0.9.18 9 | appnope==0.1.4 ; sys_platform == 'darwin' 10 | argon2-cffi==23.1.0 11 | argon2-cffi-bindings==21.2.0 12 | arrow==1.3.0 13 | asgiref==3.8.1 14 | asttokens==3.0.0 15 | async-lru==2.0.5 16 | attrs==25.3.0 17 | authlib==1.3.1 18 | babel==2.17.0 19 | backoff==2.2.1 20 | bcrypt==4.3.0 21 | beautifulsoup4==4.13.4 22 | bleach==6.2.0 23 | blockbuster==1.5.24 ; python_full_version < '4.0' 24 | build==1.2.2.post1 25 | cachetools==5.5.2 26 | certifi==2025.1.31 27 | cffi==1.17.1 28 | charset-normalizer==3.4.1 29 | chroma-hnswlib==0.7.6 30 | chromadb==0.6.3 31 | click==8.1.8 32 | cloudpickle==3.1.1 ; python_full_version < '4.0' 33 | colorama==0.4.6 34 | coloredlogs==15.0.1 35 | comm==0.2.2 36 | cryptography==44.0.2 37 | dataclasses-json==0.6.7 38 | debugpy==1.8.14 39 | decorator==5.2.1 40 | defusedxml==0.7.1 41 | deprecated==1.2.18 42 | distro==1.9.0 43 | durationpy==0.9 44 | executing==2.2.0 45 | fastapi==0.115.12 46 | fastjsonschema==2.21.1 47 | filelock==3.18.0 48 | flatbuffers==25.2.10 49 | forbiddenfruit==0.1.4 ; python_full_version < '4.0' and implementation_name == 'cpython' 50 | fqdn==1.5.1 51 | frozenlist==1.6.0 52 | fsspec==2024.12.0 53 | gitdb==4.0.12 54 | gitpython==3.1.44 55 | google-auth==2.39.0 56 | googleapis-common-protos==1.70.0 57 | greenlet==3.2.0 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64') 58 | grpcio==1.71.0 59 | grpcio-health-checking==1.71.0 60 | grpcio-tools==1.71.0 61 | h11==0.14.0 62 | httpcore==1.0.8 63 | httptools==0.6.4 64 | httpx==0.28.1 65 | httpx-sse==0.4.0 66 | huggingface-hub==0.30.2 67 | humanfriendly==10.0 68 | idna==3.10 69 | importlib-metadata==8.6.1 70 | importlib-resources==6.5.2 71 | ipykernel==6.29.5 72 | ipython==9.1.0 73 | ipython-pygments-lexers==1.1.1 74 | ipywidgets==8.1.6 75 | isoduration==20.11.0 76 | jedi==0.19.2 77 | jinja2==3.1.6 78 | jiter==0.9.0 79 | joblib==1.4.2 80 | json5==0.12.0 81 | jsonpatch==1.33 82 | jsonpointer==3.0.0 83 | jsonschema==4.23.0 84 | jsonschema-rs==0.29.1 ; python_full_version < '4.0' 85 | jsonschema-specifications==2024.10.1 86 | jupyter-client==8.6.3 87 | jupyter-core==5.7.2 88 | jupyter-events==0.12.0 89 | jupyter-lsp==2.2.5 90 | jupyter-server==2.15.0 91 | jupyter-server-mathjax==0.2.6 92 | jupyter-server-terminals==0.5.3 93 | jupyterlab==4.4.0 94 | jupyterlab-git==0.51.1 95 | jupyterlab-pygments==0.3.0 96 | jupyterlab-server==2.27.3 97 | jupyterlab-widgets==3.0.14 98 | kubernetes==32.0.1 99 | langchain==0.3.23 100 | langchain-chroma==0.2.2 ; python_full_version >= '3.13' 101 | langchain-chroma==0.2.3 ; python_full_version < '3.13' 102 | langchain-community==0.3.21 103 | langchain-core==0.3.54 104 | langchain-openai==0.3.14 105 | langchain-text-splitters==0.3.8 106 | langchain-weaviate==0.0.4 107 | langgraph==0.3.31 108 | langgraph-api==0.1.9 ; python_full_version < '4.0' 109 | langgraph-checkpoint==2.0.24 110 | langgraph-cli==0.2.5 111 | langgraph-prebuilt==0.1.8 112 | langgraph-runtime-inmem==0.0.4 ; python_full_version < '4.0' 113 | langgraph-sdk==0.1.61 114 | langsmith==0.3.32 115 | lark==1.2.2 116 | llvmlite==0.44.0 117 | markdown-it-py==3.0.0 118 | markupsafe==3.0.2 119 | marshmallow==3.26.1 120 | matplotlib-inline==0.1.7 121 | mdurl==0.1.2 122 | mistune==3.1.3 123 | mmh3==5.1.0 124 | monotonic==1.6 125 | mpmath==1.3.0 126 | multidict==6.4.3 127 | mypy-extensions==1.0.0 128 | narwhals==1.35.0 129 | nbclient==0.10.2 130 | nbconvert==7.16.6 131 | nbdime==4.0.2 132 | nbformat==5.10.4 133 | nest-asyncio==1.6.0 134 | notebook-shim==0.2.4 135 | numba==0.61.2 136 | numpy==1.26.4 137 | oauthlib==3.2.2 138 | onnxruntime==1.21.1 139 | openai==1.75.0 140 | opentelemetry-api==1.32.1 141 | opentelemetry-exporter-otlp-proto-common==1.32.1 142 | opentelemetry-exporter-otlp-proto-grpc==1.32.1 143 | opentelemetry-instrumentation==0.53b1 144 | opentelemetry-instrumentation-asgi==0.53b1 145 | opentelemetry-instrumentation-fastapi==0.53b1 146 | opentelemetry-proto==1.32.1 147 | opentelemetry-sdk==1.32.1 148 | opentelemetry-semantic-conventions==0.53b1 149 | opentelemetry-util-http==0.53b1 150 | orjson==3.10.16 151 | ormsgpack==1.9.1 152 | overrides==7.7.0 153 | packaging==24.2 154 | pandas==2.2.3 155 | pandocfilters==1.5.1 156 | parso==0.8.4 157 | pexpect==4.9.0 158 | platformdirs==4.3.7 159 | plotly==6.0.1 160 | posthog==3.25.0 161 | prometheus-client==0.21.1 162 | prompt-toolkit==3.0.51 163 | propcache==0.3.1 164 | protobuf==5.29.4 165 | psutil==7.0.0 166 | psygnal==0.12.0 167 | ptyprocess==0.7.0 168 | pure-eval==0.2.3 169 | pyasn1==0.6.1 170 | pyasn1-modules==0.4.2 171 | pycparser==2.22 172 | pydantic==2.11.3 173 | pydantic-core==2.33.1 174 | pydantic-settings==2.9.1 175 | pygments==2.19.1 176 | pyjwt==2.10.1 ; python_full_version < '4.0' 177 | pynndescent==0.5.13 178 | pypika==0.48.9 179 | pyproject-hooks==1.2.0 180 | pyreadline3==3.5.4 ; sys_platform == 'win32' 181 | python-dateutil==2.9.0.post0 182 | python-dotenv==1.1.0 183 | python-json-logger==3.3.0 184 | pytube==11.0.2 185 | pytz==2025.2 186 | pywin32==310 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32' 187 | pywinpty==2.0.15 ; os_name == 'nt' 188 | pyyaml==6.0.2 189 | pyzmq==26.4.0 190 | referencing==0.36.2 191 | regex==2024.11.6 192 | requests==2.32.3 193 | requests-oauthlib==2.0.0 194 | requests-toolbelt==1.0.0 195 | rfc3339-validator==0.1.4 196 | rfc3986-validator==0.1.1 197 | rich==14.0.0 198 | rpds-py==0.24.0 199 | rsa==4.9.1 200 | ruff==0.11.6 201 | safetensors==0.5.3 202 | scikit-learn==1.6.1 203 | scipy==1.15.2 204 | send2trash==1.8.3 205 | setuptools==79.0.0 206 | shellingham==1.5.4 207 | simsimd==6.2.1 208 | six==1.17.0 209 | smmap==5.0.2 210 | sniffio==1.3.1 211 | soupsieve==2.7 212 | sqlalchemy==2.0.40 213 | sse-starlette==2.1.3 ; python_full_version < '4.0' 214 | stack-data==0.6.3 215 | starlette==0.46.2 216 | structlog==25.2.0 ; python_full_version < '4.0' 217 | sympy==1.13.1 218 | tenacity==9.1.2 219 | terminado==0.18.1 220 | threadpoolctl==3.6.0 221 | tiktoken==0.9.0 222 | tinycss2==1.4.0 223 | tokenizers==0.21.1 224 | tornado==6.4.2 225 | tqdm==4.67.1 226 | traitlets==5.14.3 227 | transformers==4.49.0 228 | typer==0.15.2 229 | types-python-dateutil==2.9.0.20241206 230 | typing-extensions==4.13.2 231 | typing-inspect==0.9.0 232 | typing-inspection==0.4.0 233 | tzdata==2025.2 234 | umap-learn==0.5.7 235 | uri-template==1.3.0 236 | urllib3==2.4.0 237 | uvicorn==0.34.2 238 | uvloop==0.21.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32' 239 | validators==0.34.0 240 | watchfiles==1.0.5 241 | wcwidth==0.2.13 242 | weaviate-client==4.13.2 243 | webcolors==24.11.1 244 | webencodings==0.5.1 245 | websocket-client==1.8.0 246 | websockets==15.0.1 247 | widgetsnbextension==4.0.14 248 | wrapt==1.17.2 249 | xxhash==3.5.0 250 | yarl==1.20.0 251 | youtube-transcript-api==1.0.3 252 | zipp==3.21.0 253 | zstandard==0.23.0 254 | -------------------------------------------------------------------------------- /src/llm_rag/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from dotenv import find_dotenv, load_dotenv 4 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 5 | 6 | load_dotenv(find_dotenv()) 7 | 8 | project_path = Path(__file__).resolve().parents[2] 9 | 10 | llm = ChatOpenAI(model="gpt-4o-mini", temperature=1) 11 | embeddings = OpenAIEmbeddings(model="text-embedding-3-small") 12 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/colbert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/colbert/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/colbert/colbert_model.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langgraph.graph import END, START, StateGraph 6 | from llm_rag import llm 7 | from llm_rag.indexing.colbert_model import retriever 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | from rich.pretty import Pretty 11 | 12 | rag_prompt_template = """Answer the following question based on this context: 13 | 14 | {context} 15 | 16 | Question: {question}""" 17 | 18 | 19 | def format_docs(docs): 20 | return "\n\n".join(doc.page_content for doc in docs) 21 | 22 | 23 | class State(TypedDict): 24 | question: str 25 | context: list[Document] 26 | answer: str 27 | 28 | 29 | def retrieve(state: State): 30 | retrieved_docs = retriever.invoke(state["question"]) 31 | return {"context": retrieved_docs} 32 | 33 | 34 | def generate(state: State): 35 | docs_content = format_docs(state["context"]) 36 | rag_prompt = rag_prompt_template.format( 37 | question=state["question"], context=docs_content 38 | ) 39 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 40 | return {"answer": response.content} 41 | 42 | 43 | graph_builder = StateGraph(State) 44 | 45 | graph_builder.add_node("retrieve", retrieve) 46 | graph_builder.add_node("generate", generate) 47 | 48 | graph_builder.add_edge(START, "retrieve") 49 | graph_builder.add_edge("retrieve", "generate") 50 | graph_builder.add_edge("generate", END) 51 | 52 | graph = graph_builder.compile() 53 | 54 | 55 | if __name__ == "__main__": 56 | queries = [ 57 | "What is task decomposition for LLM agents?", 58 | "What are main steps for collecting human data?", 59 | ] 60 | 61 | for query in queries: 62 | response = graph.invoke({"question": query}) 63 | rprint(Pretty(response, no_wrap=False)) 64 | rprint(Markdown(response["answer"])) 65 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/crag/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/crag/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/crag/crag.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from typing import Annotated, Literal, TypedDict 3 | 4 | from langchain_community.tools.tavily_search import TavilySearchResults 5 | from langchain_core.documents import Document 6 | from langchain_core.messages import HumanMessage 7 | from langchain_core.runnables import chain 8 | from langgraph.graph import END, START, StateGraph 9 | from pydantic import BaseModel, Field 10 | from rich import print as rprint 11 | from rich.markdown import Markdown 12 | from rich.pretty import Pretty 13 | 14 | from llm_rag import llm 15 | from llm_rag.indexing.reflection import retriever 16 | 17 | rag_prompt_template = """Answer the following question based on this context: 18 | 19 | {context} 20 | 21 | Question: {question} 22 | """ 23 | 24 | grading_prompt_template = """You are a grader assessing relevance of a retrieved document to a user question. 25 | If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. 26 | Give a binary score to indicate whether the document is relevant to the question. 27 | 28 | Retrieved document: 29 | {document} 30 | 31 | User question: 32 | {question}""" 33 | 34 | query_rewriting_prompt_template = """You a question re-writer that converts an input question to a better version that is optimized 35 | for web search. 36 | Look at the input and try to reason about the underlying semantic intent / meaning. 37 | 38 | Here is the initial question: 39 | {question} 40 | 41 | Formulate an improved question.""" 42 | 43 | 44 | def format_docs(docs: list[Document]) -> list[str]: 45 | return "\n\n".join(doc.page_content for doc in docs) 46 | 47 | 48 | class DocumentGrade(BaseModel): 49 | """Relevance check on retrieved document.""" 50 | 51 | chain_of_thought: str = Field( 52 | ..., 53 | description="Step by step reasoning to check if the document is relevant to the question", 54 | ) 55 | is_relevant: bool = Field(description="Document is relevant to the question") 56 | 57 | 58 | grader_llm = llm.with_structured_output(DocumentGrade, method="function_calling") 59 | 60 | 61 | @chain 62 | def grade_document(document, question): 63 | grading_prompt = grading_prompt_template.format( 64 | document=document, question=question 65 | ) 66 | response = grader_llm.invoke([HumanMessage(content=grading_prompt)]) 67 | return response 68 | 69 | 70 | class WebSearchQuery(BaseModel): 71 | """Question optimization for web search.""" 72 | 73 | chain_of_thought: str = Field( 74 | ..., description="Step by step reasoning to optimize query for web search" 75 | ) 76 | web_search_query: str = Field(description="Optimized web search query") 77 | 78 | 79 | web_search_llm = llm.with_structured_output(WebSearchQuery, method="function_calling") 80 | web_search_tool = TavilySearchResults(k=4) 81 | 82 | 83 | class State(TypedDict): 84 | question: str 85 | documents: list[Document] 86 | grades: list[DocumentGrade] 87 | is_web_search_required: bool 88 | web_search_query: str 89 | context: Annotated[list[Document], operator.add] 90 | answer: str 91 | 92 | 93 | def retrieve(state: State): 94 | question = state["question"] 95 | documents = retriever.invoke(question) 96 | return {"documents": documents} 97 | 98 | 99 | def grade_documents(state: State): 100 | question = state["question"] 101 | documents = state["documents"] 102 | 103 | grades = grade_document.batch(documents, question=question) 104 | filtered_documents = [ 105 | document for (document, grade) in zip(documents, grades) if grade.is_relevant 106 | ] 107 | is_web_search_required = len(filtered_documents) < len(documents) 108 | 109 | return { 110 | "context": filtered_documents, 111 | "grades": grades, 112 | "is_web_search_required": is_web_search_required, 113 | } 114 | 115 | 116 | def check_documents_relevance( 117 | state: State, 118 | ) -> Literal["rewrite_query", "generate_answer"]: 119 | is_web_search_required = state["is_web_search_required"] 120 | 121 | if is_web_search_required: 122 | return "rewrite_query" 123 | else: 124 | return "generate_answer" 125 | 126 | 127 | def rewrite_query(state: State): 128 | question = state["question"] 129 | query_rewriting_prompt = query_rewriting_prompt_template.format(question=question) 130 | response = web_search_llm.invoke(query_rewriting_prompt) 131 | return {"web_search_query": response.web_search_query} 132 | 133 | 134 | def web_search(state: State): 135 | query = state["web_search_query"] 136 | results = web_search_tool.invoke({"query": query}) 137 | documents = [Document(page_content=result["content"]) for result in results] 138 | return {"context": documents} 139 | 140 | 141 | def generate_answer(state: State): 142 | docs_content = format_docs(state["context"]) 143 | rag_prompt = rag_prompt_template.format( 144 | question=state["question"], context=docs_content 145 | ) 146 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 147 | return {"answer": response.content} 148 | 149 | 150 | graph_builder = StateGraph(State) 151 | 152 | graph_builder.add_node("retrieve", retrieve) 153 | graph_builder.add_node("grade_documents", grade_documents) 154 | graph_builder.add_node("rewrite_query", rewrite_query) 155 | graph_builder.add_node("web_search", web_search) 156 | graph_builder.add_node("generate_answer", generate_answer) 157 | 158 | graph_builder.add_edge(START, "retrieve") 159 | graph_builder.add_edge("retrieve", "grade_documents") 160 | graph_builder.add_conditional_edges("grade_documents", check_documents_relevance) 161 | graph_builder.add_edge("rewrite_query", "web_search") 162 | graph_builder.add_edge("web_search", "generate_answer") 163 | graph_builder.add_edge("generate_answer", END) 164 | 165 | graph = graph_builder.compile() 166 | 167 | 168 | if __name__ == "__main__": 169 | queries = [ 170 | "What are common types of agent memory?", 171 | "What are main steps for collecting human data?", 172 | "How does the AlphaCodium paper work?", 173 | ] 174 | 175 | for query in queries: 176 | response = graph.invoke({"question": query}) 177 | rprint(Pretty(response)) 178 | rprint(Markdown(response["answer"])) 179 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/decomposition/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/decomposition/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/decomposition/parallel.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from typing import Annotated, TypedDict 3 | 4 | from langchain_core.documents import Document 5 | from langchain_core.messages import HumanMessage 6 | from langchain_core.runnables import RunnableConfig 7 | from langgraph.constants import Send 8 | from langgraph.graph import END, START, StateGraph 9 | from llm_rag import llm 10 | from llm_rag.indexing.article import retriever 11 | from pydantic import BaseModel, Field 12 | from rich import print as rprint 13 | from rich.markdown import Markdown 14 | from rich.pretty import Pretty 15 | 16 | decomposition_prompt_template = """You are a helpful assistant that generates multiple sub-questions related to an input question. 17 | The goal is to break down the input into a set of sub-problems / sub-questions that can be answered sequentially. 18 | Generate multiple search queries related to: {question}""" 19 | 20 | 21 | sub_question_prompt_template = """Answer the following question based on this context: 22 | 23 | {context} 24 | 25 | Question: {question} 26 | """ 27 | 28 | 29 | rag_prompt_template = """Here is a set of Q+A pairs: 30 | 31 | {context} 32 | 33 | Use these to synthesize an answer to the question: {question} 34 | """ 35 | 36 | 37 | def format_qa_pair(question: str, answer: str) -> str: 38 | return f"Question: {question} \nAnswer: {answer}\n\n\n" 39 | 40 | 41 | class State(TypedDict): 42 | question: str 43 | generated_sub_questions: list[str] 44 | qa_pairs: Annotated[list[dict[str, str]], operator.add] 45 | context: list[Document] 46 | answer: str 47 | 48 | 49 | class RetrieverState(TypedDict): 50 | generated_sub_question: str 51 | 52 | 53 | def generate_sub_questions(query: str, config: RunnableConfig) -> list[str]: 54 | max_generated_sub_questions_count = config["configurable"].get( 55 | "max_generated_sub_questions_count", 3 56 | ) 57 | 58 | class SubQuestionsGenerator(BaseModel): 59 | sub_questions: list[str] = Field( 60 | ..., 61 | description="List of generated sub-problems / sub-questions", 62 | max_items=max_generated_sub_questions_count, 63 | ) 64 | 65 | structured_llm = llm.with_structured_output( 66 | SubQuestionsGenerator, method="function_calling" 67 | ) 68 | decomposition_prompt = decomposition_prompt_template.format(question=query) 69 | response = structured_llm.invoke([HumanMessage(content=decomposition_prompt)]) 70 | questions = response.sub_questions 71 | 72 | return {"generated_sub_questions": questions} 73 | 74 | 75 | def assign_sub_questions(state: State): 76 | return [ 77 | Send("answer_sub_question", {"generated_sub_question": sub_question}) 78 | for sub_question in state["generated_sub_questions"] 79 | ] 80 | 81 | 82 | def answer_sub_question(state: RetrieverState): 83 | question = state["generated_sub_question"] 84 | context = retriever.invoke(question) 85 | sub_question_prompt = sub_question_prompt_template.format( 86 | context=context, question=question 87 | ) 88 | answer = llm.invoke([HumanMessage(content=sub_question_prompt)]) 89 | return {"qa_pairs": [{question: answer.content}]} 90 | 91 | 92 | def aggregate_qa_pairs(state: State): 93 | context = "" 94 | 95 | for qa_pair in state["qa_pairs"]: 96 | [(question, answer)] = qa_pair.items() 97 | context += format_qa_pair(question, answer) 98 | 99 | return {"context": context} 100 | 101 | 102 | def generate_answer(state: State): 103 | rag_prompt = rag_prompt_template.format( 104 | context=state["context"], question=state["question"] 105 | ) 106 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 107 | return {"answer": response.content} 108 | 109 | 110 | class ConfigSchema(BaseModel): 111 | max_generated_sub_questions_count: int = Field(default=3, gt=1) 112 | 113 | 114 | graph_builder = StateGraph(State, ConfigSchema) 115 | 116 | graph_builder.add_node("generate_sub_questions", generate_sub_questions) 117 | graph_builder.add_node("answer_sub_question", answer_sub_question) 118 | graph_builder.add_node("aggregate_qa_pairs", aggregate_qa_pairs) 119 | graph_builder.add_node("generate_answer", generate_answer) 120 | 121 | graph_builder.add_edge(START, "generate_sub_questions") 122 | graph_builder.add_conditional_edges( 123 | "generate_sub_questions", assign_sub_questions, ["answer_sub_question"] 124 | ) 125 | graph_builder.add_edge("answer_sub_question", "aggregate_qa_pairs") 126 | graph_builder.add_edge("aggregate_qa_pairs", "generate_answer") 127 | graph_builder.add_edge("generate_answer", END) 128 | graph = graph_builder.compile() 129 | 130 | 131 | if __name__ == "__main__": 132 | query = "What are the main components of an LLM-powered autonomous agent system?" 133 | config = { 134 | "configurable": { 135 | "max_generated_sub_questions_count": 5, 136 | } 137 | } 138 | response = graph.invoke( 139 | {"question": query}, 140 | config=config, 141 | ) 142 | 143 | rprint(Pretty(response, max_depth=2)) 144 | rprint(Markdown(response["answer"])) 145 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/decomposition/recursive.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langchain_core.runnables import RunnableConfig 6 | from langgraph.graph import END, START, StateGraph 7 | from pydantic import BaseModel, Field 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | 11 | from llm_rag import llm 12 | from llm_rag.indexing.article import vectorstore 13 | 14 | decomposition_prompt_template = """You are a helpful assistant that generates multiple sub-questions related to an input question. 15 | The goal is to break down the input into a set of sub-problems / sub-questions that can be answered sequentially. 16 | Generate multiple search queries related to: {question}""" 17 | 18 | 19 | recursive_prompt_template = """Here is the question you need to answer: 20 | 21 | {question} 22 | 23 | 24 | Here are any available background question + answer pairs: 25 | 26 | {qa_pairs} 27 | 28 | 29 | Here is additional context relevant to the question: 30 | 31 | {context} 32 | 33 | 34 | Use the above context and any background question + answer pairs to answer the question: 35 | 36 | {question} 37 | 38 | """ 39 | 40 | 41 | def format_qa_pair(question: str, answer: str) -> str: 42 | return f"Question: {question} \nAnswer:\n{answer}\n\n" 43 | 44 | 45 | class State(TypedDict): 46 | question: str 47 | all_questions: list[str] 48 | current_question_idx: int 49 | qa_pairs: list[str] 50 | context: list[Document] 51 | answer: str 52 | 53 | 54 | def generate_sub_questions(state: State, config: RunnableConfig) -> list[str]: 55 | max_generated_sub_questions_count = config["configurable"].get( 56 | "max_generated_sub_questions_count", 3 57 | ) 58 | query = state["question"] 59 | 60 | class SubQuestionsGenerator(BaseModel): 61 | sub_questions: list[str] = Field( 62 | ..., 63 | description="List of generated sub-problems / sub-questions", 64 | max_items=max_generated_sub_questions_count, 65 | ) 66 | 67 | structured_llm = llm.with_structured_output( 68 | SubQuestionsGenerator, method="function_calling" 69 | ) 70 | decomposition_prompt = decomposition_prompt_template.format(question=query) 71 | response = structured_llm.invoke([HumanMessage(content=decomposition_prompt)]) 72 | questions = response.sub_questions + [query] 73 | 74 | return {"all_questions": questions, "current_question_idx": 0} 75 | 76 | 77 | def retrieve_docs(state: State): 78 | question = state["all_questions"][state["current_question_idx"]] 79 | retrieved_docs = vectorstore.similarity_search(question) 80 | return {"context": retrieved_docs} 81 | 82 | 83 | def generate_answer(state: State): 84 | question = state["all_questions"][state["current_question_idx"]] 85 | recursive_prompt = recursive_prompt_template.format( 86 | question=question, qa_pairs=state.get("qa_pairs", ""), context=state["context"] 87 | ) 88 | answer = llm.invoke([HumanMessage(content=recursive_prompt)]) 89 | qa_pair = format_qa_pair(question, answer.content) 90 | qa_pairs = state.get("qa_pairs", "") + qa_pair 91 | 92 | if state["current_question_idx"] == len(state["all_questions"]) - 1: 93 | return {"answer": answer.content} 94 | else: 95 | return { 96 | "qa_pairs": qa_pairs, 97 | "current_question_idx": state["current_question_idx"] + 1, 98 | } 99 | 100 | 101 | def check_answer_status(state: State) -> Literal["Next sub-question", "Final answer"]: 102 | if state.get("answer"): 103 | return "Final answer" 104 | else: 105 | return "Next sub-question" 106 | 107 | 108 | class ConfigSchema(BaseModel): 109 | max_generated_sub_questions_count: int = Field(default=3, gt=1) 110 | 111 | 112 | graph_builder = StateGraph(State, ConfigSchema) 113 | 114 | graph_builder.add_node("generate_sub_questions", generate_sub_questions) 115 | graph_builder.add_node("retrieve_docs", retrieve_docs) 116 | graph_builder.add_node("generate_answer", generate_answer) 117 | 118 | graph_builder.add_edge(START, "generate_sub_questions") 119 | graph_builder.add_edge("generate_sub_questions", "retrieve_docs") 120 | graph_builder.add_edge("retrieve_docs", "generate_answer") 121 | graph_builder.add_conditional_edges( 122 | "generate_answer", 123 | check_answer_status, 124 | {"Next sub-question": "retrieve_docs", "Final answer": END}, 125 | ) 126 | 127 | graph = graph_builder.compile() 128 | 129 | 130 | if __name__ == "__main__": 131 | query = "What are the main components of an LLM-powered autonomous agent system?" 132 | config = { 133 | "configurable": { 134 | "max_generated_sub_questions_count": 3, 135 | } 136 | } 137 | 138 | for stream_mode, event in graph.stream( 139 | {"question": query}, 140 | stream_mode=["messages", "updates"], 141 | config=config, 142 | ): 143 | match stream_mode: 144 | case "messages": 145 | message, metadata = event 146 | print(message.content, end="", flush=True) 147 | case "updates": 148 | rprint(event) 149 | 150 | rprint(Markdown(event["generate_answer"]["answer"])) 151 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/hyde/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/hyde/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/hyde/hyde.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | import numpy as np 4 | from langchain_core.documents import Document 5 | from langchain_core.messages import HumanMessage 6 | from langchain_core.runnables import RunnableConfig 7 | from langgraph.graph import END, START, StateGraph 8 | from pydantic import BaseModel, Field 9 | from rich import print as rprint 10 | from rich.markdown import Markdown 11 | from rich.pretty import Pretty 12 | 13 | from llm_rag import embeddings, llm 14 | from llm_rag.indexing.article import vectorstore 15 | 16 | hyde_prompt_template = """Please write a passage to answer the question 17 | Question: {question} 18 | Passage:""" 19 | 20 | 21 | rag_prompt_template = """Answer the following question based on this context: 22 | 23 | {context} 24 | 25 | Question: {question} 26 | """ 27 | 28 | 29 | def format_docs(docs): 30 | return "\n\n".join(doc.page_content for doc in docs) 31 | 32 | 33 | class State(TypedDict): 34 | question: str 35 | generated_documents: list[str] 36 | hyde_embeddings: np.ndarray 37 | context: list[Document] 38 | answer: str 39 | 40 | 41 | def generate_documents(state: State, config: RunnableConfig) -> list[Document]: 42 | generated_documents_count = config["configurable"].get( 43 | "generated_documents_count", 3 44 | ) 45 | 46 | hyde_prompt = hyde_prompt_template.format(question=state["question"]) 47 | generated_documents = llm.batch([hyde_prompt] * generated_documents_count) 48 | 49 | return { 50 | "generated_documents": [document.content for document in generated_documents] 51 | } 52 | 53 | 54 | def calculate_hyde_embeddings(state: State): 55 | question_embeddings = np.array(embeddings.embed_query(state["question"])) 56 | generated_documents_embeddings = np.array( 57 | embeddings.embed_documents(state["generated_documents"]) 58 | ) 59 | hyde_embeddings = np.vstack( 60 | [question_embeddings, generated_documents_embeddings] 61 | ).mean(axis=0) 62 | return {"hyde_embeddings": list(hyde_embeddings)} 63 | 64 | 65 | def get_relevant_documents(state: State): 66 | documents = vectorstore.similarity_search_by_vector(state["hyde_embeddings"]) 67 | return {"context": documents} 68 | 69 | 70 | def generate_answer(state: State): 71 | docs_content = format_docs(state["context"]) 72 | rag_prompt = rag_prompt_template.format( 73 | context=docs_content, question=state["question"] 74 | ) 75 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 76 | return {"answer": response.content} 77 | 78 | 79 | class ConfigSchema(BaseModel): 80 | generated_documents_count: int = Field(default=3, gt=0) 81 | 82 | 83 | graph_builder = StateGraph(State, ConfigSchema) 84 | 85 | graph_builder.add_node("generate_documents", generate_documents) 86 | graph_builder.add_node("calculate_hyde_embeddings", calculate_hyde_embeddings) 87 | graph_builder.add_node("get_relevant_documents", get_relevant_documents) 88 | graph_builder.add_node("generate_answer", generate_answer) 89 | 90 | graph_builder.add_edge(START, "generate_documents") 91 | graph_builder.add_edge("generate_documents", "calculate_hyde_embeddings") 92 | graph_builder.add_edge("calculate_hyde_embeddings", "get_relevant_documents") 93 | graph_builder.add_edge("get_relevant_documents", "generate_answer") 94 | graph_builder.add_edge("generate_answer", END) 95 | graph = graph_builder.compile() 96 | 97 | 98 | if __name__ == "__main__": 99 | query = "What is task decomposition for LLM agents?" 100 | config = { 101 | "configurable": { 102 | "generated_documents_count": 5, 103 | } 104 | } 105 | response = graph.invoke( 106 | {"question": query}, 107 | config=config, 108 | ) 109 | 110 | rprint(Pretty(response, max_depth=2, max_length=20)) 111 | rprint(Markdown(response["answer"])) 112 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/multi_query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/multi_query/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/multi_query/multi_query.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from typing import Annotated, TypedDict 3 | 4 | from langchain_core.documents import Document 5 | from langchain_core.load import dumps, loads 6 | from langchain_core.messages import HumanMessage 7 | from langchain_core.runnables import RunnableConfig 8 | from langgraph.constants import Send 9 | from langgraph.graph import END, START, StateGraph 10 | from pydantic import BaseModel, Field 11 | from rich import print as rprint 12 | from rich.markdown import Markdown 13 | from rich.pretty import Pretty 14 | 15 | from llm_rag import llm 16 | from llm_rag.indexing.article import vectorstore 17 | 18 | rag_prompt_template = """Answer the following question based on this context: 19 | 20 | {context} 21 | 22 | Question: {question} 23 | """ 24 | 25 | 26 | def get_unique_docs(documents: list[list[Document]]) -> list[Document]: 27 | flattened_docs = [dumps(doc) for sublist in documents for doc in sublist] 28 | unique_docs = list(set(flattened_docs)) 29 | return [loads(doc) for doc in unique_docs] 30 | 31 | 32 | def format_docs(docs: list[Document]) -> list[str]: 33 | return "\n\n".join(doc.page_content for doc in docs) 34 | 35 | 36 | class State(TypedDict): 37 | question: str 38 | generated_questions: list[str] 39 | retrieved_docs: Annotated[list[list[Document]], operator.add] 40 | context: list[Document] 41 | answer: str 42 | 43 | 44 | class RetrieverState(TypedDict): 45 | generated_question: str 46 | 47 | 48 | def generate_queries(state: State, config: RunnableConfig): 49 | generated_questions_count = config["configurable"].get( 50 | "generated_questions_count", 5 51 | ) 52 | include_original_question = config["configurable"].get( 53 | "include_original_question", True 54 | ) 55 | 56 | questions = [] 57 | query = state["question"] 58 | 59 | if include_original_question: 60 | questions.append(query) 61 | 62 | class MultiQueryGenerator(BaseModel): 63 | questions: list[str] = Field( 64 | ..., 65 | description="List of questions generated multiple perspectives based on user query", 66 | min_items=generated_questions_count, 67 | max_items=generated_questions_count, 68 | ) 69 | 70 | structured_llm = llm.with_structured_output( 71 | MultiQueryGenerator, method="function_calling" 72 | ) 73 | response = structured_llm.invoke(query) 74 | questions.extend(response.questions) 75 | 76 | return {"generated_questions": questions} 77 | 78 | 79 | def assign_queries(state: State): 80 | return [ 81 | Send("retrieve_docs", {"generated_question": question}) 82 | for question in state["generated_questions"] 83 | ] 84 | 85 | 86 | def retrieve_docs(state: RetrieverState): 87 | retrieved_docs = vectorstore.similarity_search(state["generated_question"]) 88 | return {"retrieved_docs": [retrieved_docs]} 89 | 90 | 91 | def aggregate_docs(state: State): 92 | retrieved_docs = state["retrieved_docs"] 93 | docs = get_unique_docs(retrieved_docs) 94 | return {"context": docs} 95 | 96 | 97 | def generate_answer(state: State): 98 | docs_content = format_docs(state["context"]) 99 | rag_prompt = rag_prompt_template.format( 100 | question=state["question"], context=docs_content 101 | ) 102 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 103 | return {"answer": response.content} 104 | 105 | 106 | class ConfigSchema(BaseModel): 107 | generated_questions_count: int = Field(default=5, gt=1) 108 | include_original_question: bool = Field(default=True) 109 | 110 | 111 | graph_builder = StateGraph(State, ConfigSchema) 112 | 113 | graph_builder.add_node("generate_queries", generate_queries) 114 | graph_builder.add_node("retrieve_docs", retrieve_docs) 115 | graph_builder.add_node("aggregate_docs", aggregate_docs) 116 | graph_builder.add_node("generate_answer", generate_answer) 117 | 118 | graph_builder.add_edge(START, "generate_queries") 119 | graph_builder.add_conditional_edges( 120 | "generate_queries", assign_queries, ["retrieve_docs"] 121 | ) 122 | graph_builder.add_edge("retrieve_docs", "aggregate_docs") 123 | graph_builder.add_edge("aggregate_docs", "generate_answer") 124 | graph_builder.add_edge("generate_answer", END) 125 | 126 | graph = graph_builder.compile() 127 | 128 | 129 | if __name__ == "__main__": 130 | query = "What is task decomposition for LLM agents?" 131 | config = { 132 | "configurable": { 133 | "generated_questions_count": 3, 134 | "include_original_question": False, 135 | } 136 | } 137 | response = graph.invoke({"question": query}, config=config) 138 | 139 | rprint(Pretty(response, max_depth=2)) 140 | rprint(Markdown(response["answer"])) 141 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/multi_vector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/multi_vector/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/multi_vector/chunks.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langgraph.graph import END, START, StateGraph 6 | from llm_rag import llm 7 | from llm_rag.indexing.multi_vector.chunks import retriever 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | from rich.pretty import Pretty 11 | 12 | rag_prompt_template = """Answer the following question based on this context: 13 | 14 | {context} 15 | 16 | Question: {question} 17 | """ 18 | 19 | 20 | def format_docs(docs): 21 | return "\n\n".join(doc.page_content for doc in docs) 22 | 23 | 24 | class State(TypedDict): 25 | question: str 26 | search_results: list[Document] 27 | context: list[Document] 28 | answer: str 29 | 30 | 31 | def retrieve(state: State): 32 | search_results = retriever.vectorstore.similarity_search(state["question"]) 33 | retrieved_docs = retriever.invoke(state["question"]) 34 | return { 35 | "search_results": search_results, 36 | "context": retrieved_docs, 37 | } 38 | 39 | 40 | def generate(state: State): 41 | docs_content = format_docs(state["context"]) 42 | rag_prompt = rag_prompt_template.format( 43 | question=state["question"], context=docs_content 44 | ) 45 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 46 | return {"answer": response.content} 47 | 48 | 49 | graph_builder = StateGraph(State) 50 | 51 | graph_builder.add_node("retrieve", retrieve) 52 | graph_builder.add_node("generate", generate) 53 | 54 | graph_builder.add_edge(START, "retrieve") 55 | graph_builder.add_edge("retrieve", "generate") 56 | graph_builder.add_edge("generate", END) 57 | 58 | graph = graph_builder.compile() 59 | 60 | 61 | if __name__ == "__main__": 62 | agent_query = "What is task decomposition for LLM agents?" 63 | response = graph.invoke({"question": agent_query}) 64 | rprint(Pretty(response, max_string=100, no_wrap=False)) 65 | rprint(Markdown(response["answer"])) 66 | 67 | human_data_query = "What are main steps for collecting human data?" 68 | response = graph.invoke({"question": human_data_query}) 69 | rprint(Pretty(response, max_string=100, no_wrap=False)) 70 | rprint(Markdown(response["answer"])) 71 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/multi_vector/hypothetical_questions.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langgraph.graph import END, START, StateGraph 6 | from llm_rag import llm 7 | from llm_rag.indexing.multi_vector.hypothetical_questions import retriever 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | from rich.pretty import Pretty 11 | 12 | rag_prompt_template = """Answer the following question based on this context: 13 | 14 | {context} 15 | 16 | Question: {question} 17 | """ 18 | 19 | 20 | def format_docs(docs): 21 | return "\n\n".join(doc.page_content for doc in docs) 22 | 23 | 24 | class State(TypedDict): 25 | question: str 26 | search_results: list[Document] 27 | context: list[Document] 28 | answer: str 29 | 30 | 31 | def retrieve(state: State): 32 | search_results = retriever.vectorstore.similarity_search(state["question"]) 33 | retrieved_docs = retriever.invoke(state["question"]) 34 | return { 35 | "search_results": search_results, 36 | "context": retrieved_docs, 37 | } 38 | 39 | 40 | def generate(state: State): 41 | docs_content = format_docs(state["context"]) 42 | rag_prompt = rag_prompt_template.format( 43 | question=state["question"], context=docs_content 44 | ) 45 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 46 | return {"answer": response.content} 47 | 48 | 49 | graph_builder = StateGraph(State) 50 | 51 | graph_builder.add_node("retrieve", retrieve) 52 | graph_builder.add_node("generate", generate) 53 | 54 | graph_builder.add_edge(START, "retrieve") 55 | graph_builder.add_edge("retrieve", "generate") 56 | graph_builder.add_edge("generate", END) 57 | 58 | graph = graph_builder.compile() 59 | 60 | 61 | if __name__ == "__main__": 62 | agent_query = "What is task decomposition for LLM agents?" 63 | response = graph.invoke({"question": agent_query}) 64 | rprint(Pretty(response, max_string=100, no_wrap=False)) 65 | rprint(Markdown(response["answer"])) 66 | 67 | human_data_query = "What are main steps for collecting human data?" 68 | response = graph.invoke({"question": human_data_query}) 69 | rprint(Pretty(response, max_string=100, no_wrap=False)) 70 | rprint(Markdown(response["answer"])) 71 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/multi_vector/summary.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langgraph.graph import END, START, StateGraph 6 | from llm_rag import llm 7 | from llm_rag.indexing.multi_vector.summary import retriever 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | from rich.pretty import Pretty 11 | 12 | rag_prompt_template = """Answer the following question based on this context: 13 | 14 | {context} 15 | 16 | Question: {question} 17 | """ 18 | 19 | 20 | def format_docs(docs): 21 | return "\n\n".join(doc.page_content for doc in docs) 22 | 23 | 24 | class State(TypedDict): 25 | question: str 26 | search_results: list[Document] 27 | context: list[Document] 28 | answer: str 29 | 30 | 31 | def retrieve(state: State): 32 | search_results = retriever.vectorstore.similarity_search(state["question"]) 33 | retrieved_docs = retriever.invoke(state["question"]) 34 | return { 35 | "search_results": search_results, 36 | "context": retrieved_docs, 37 | } 38 | 39 | 40 | def generate(state: State): 41 | docs_content = format_docs(state["context"]) 42 | rag_prompt = rag_prompt_template.format( 43 | question=state["question"], context=docs_content 44 | ) 45 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 46 | return {"answer": response.content} 47 | 48 | 49 | graph_builder = StateGraph(State) 50 | 51 | graph_builder.add_node("retrieve", retrieve) 52 | graph_builder.add_node("generate", generate) 53 | 54 | graph_builder.add_edge(START, "retrieve") 55 | graph_builder.add_edge("retrieve", "generate") 56 | graph_builder.add_edge("generate", END) 57 | 58 | graph = graph_builder.compile() 59 | 60 | 61 | if __name__ == "__main__": 62 | agent_query = "What is task decomposition for LLM agents?" 63 | response = graph.invoke({"question": agent_query}) 64 | rprint(Pretty(response, max_string=100, no_wrap=False)) 65 | rprint(Markdown(response["answer"])) 66 | 67 | human_data_query = "What are main steps for collecting human data?" 68 | response = graph.invoke({"question": human_data_query}) 69 | rprint(Pretty(response, max_string=100, no_wrap=False)) 70 | rprint(Markdown(response["answer"])) 71 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/query_construction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/query_construction/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/query_construction/self_query.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langgraph.graph import END, START, StateGraph 5 | from llm_rag.indexing.self_query import retriever 6 | from rich import print as rprint 7 | from rich.pretty import Pretty 8 | 9 | 10 | class State(TypedDict): 11 | question: str 12 | context: list[Document] 13 | 14 | 15 | def retrieve(state: State): 16 | retrieved_docs = retriever.invoke(state["question"]) 17 | return {"context": retrieved_docs} 18 | 19 | 20 | graph_builder = StateGraph(State) 21 | 22 | graph_builder.add_node("retrieve", retrieve) 23 | 24 | graph_builder.add_edge(START, "retrieve") 25 | graph_builder.add_edge("retrieve", END) 26 | 27 | graph = graph_builder.compile() 28 | 29 | 30 | if __name__ == "__main__": 31 | questions = [ 32 | "Which videos are 7 to 10 minutes long", 33 | "Videos published in March 2024", 34 | "Find tutorials with views not less than 100k", 35 | "Which videos should I watch on the topic of routing", 36 | "Which 1 video should I watch on the topic of routing", 37 | ] 38 | 39 | for question in questions: 40 | print(question) 41 | response = graph.invoke({"question": question}) 42 | rprint(Pretty(response, max_string=100, no_wrap=False)) 43 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/rag_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/rag_fusion/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/rag_fusion/rag_fusion.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from collections import defaultdict 3 | from typing import Annotated, TypedDict 4 | 5 | from langchain_core.documents import Document 6 | from langchain_core.load import dumps, loads 7 | from langchain_core.messages import HumanMessage 8 | from langchain_core.runnables import RunnableConfig 9 | from langgraph.constants import Send 10 | from langgraph.graph import END, START, StateGraph 11 | from pydantic import BaseModel, Field 12 | from rich import print as rprint 13 | from rich.markdown import Markdown 14 | from rich.pretty import Pretty 15 | 16 | from llm_rag import llm 17 | from llm_rag.indexing.article import vectorstore 18 | 19 | rag_prompt_template = """Answer the following question based on this context: 20 | 21 | {context} 22 | 23 | Question: {question} 24 | """ 25 | 26 | 27 | def reciprocal_rank_fusion( 28 | results: list[list[Document]], k: int = 60 29 | ) -> list[tuple[Document, float]]: 30 | fused_scores = defaultdict(int) 31 | 32 | for docs in results: 33 | for rank, doc in enumerate(docs, start=1): 34 | fused_scores[dumps(doc)] += 1 / (k + rank) 35 | 36 | reranked_results = [ 37 | (loads(doc), score) 38 | for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True) 39 | ] 40 | 41 | return reranked_results 42 | 43 | 44 | def format_docs(docs: list[Document]) -> list[str]: 45 | return "\n\n".join(doc.page_content for doc in docs) 46 | 47 | 48 | class State(TypedDict): 49 | question: str 50 | generated_questions: list[str] 51 | retrieved_docs: Annotated[list[list[Document]], operator.add] 52 | context: list[Document] 53 | context_scores: list[float] 54 | answer: str 55 | 56 | 57 | class RetrieverState(TypedDict): 58 | generated_question: str 59 | 60 | 61 | def generate_queries(state: State, config: RunnableConfig): 62 | generated_questions_count = config["configurable"].get( 63 | "generated_questions_count", 5 64 | ) 65 | include_original_question = config["configurable"].get( 66 | "include_original_question", True 67 | ) 68 | 69 | questions = [] 70 | query = state["question"] 71 | 72 | if include_original_question: 73 | questions.append(query) 74 | 75 | class MultiQueryGenerator(BaseModel): 76 | questions: list[str] = Field( 77 | ..., 78 | description="List of questions generated multiple perspectives based on user query", 79 | min_items=generated_questions_count, 80 | max_items=generated_questions_count, 81 | ) 82 | 83 | structured_llm = llm.with_structured_output( 84 | MultiQueryGenerator, method="function_calling" 85 | ) 86 | response = structured_llm.invoke(query) 87 | questions.extend(response.questions) 88 | 89 | return {"generated_questions": questions} 90 | 91 | 92 | def assign_queries(state: State): 93 | return [ 94 | Send("retrieve_docs", {"generated_question": question}) 95 | for question in state["generated_questions"] 96 | ] 97 | 98 | 99 | def retrieve_docs(state: RetrieverState): 100 | retrieved_docs = vectorstore.similarity_search(state["generated_question"]) 101 | return {"retrieved_docs": [retrieved_docs]} 102 | 103 | 104 | def aggregate_docs(state: State): 105 | retrieved_docs = state["retrieved_docs"] 106 | reranked_results = reciprocal_rank_fusion(retrieved_docs) 107 | docs, scores = list(zip(*((doc, score) for doc, score in reranked_results))) 108 | return {"context": docs, "context_scores": scores} 109 | 110 | 111 | def generate_answer(state: State): 112 | docs_content = format_docs(state["context"]) 113 | rag_prompt = rag_prompt_template.format( 114 | question=state["question"], context=docs_content 115 | ) 116 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 117 | return {"answer": response.content} 118 | 119 | 120 | class ConfigSchema(BaseModel): 121 | generated_questions_count: int = Field(default=5, gt=1) 122 | include_original_question: bool = Field(default=True) 123 | 124 | 125 | graph_builder = StateGraph(State, ConfigSchema) 126 | 127 | graph_builder.add_node("generate_queries", generate_queries) 128 | graph_builder.add_node("retrieve_docs", retrieve_docs) 129 | graph_builder.add_node("aggregate_docs", aggregate_docs) 130 | graph_builder.add_node("generate_answer", generate_answer) 131 | 132 | graph_builder.add_edge(START, "generate_queries") 133 | graph_builder.add_conditional_edges( 134 | "generate_queries", assign_queries, ["retrieve_docs"] 135 | ) 136 | graph_builder.add_edge("retrieve_docs", "aggregate_docs") 137 | graph_builder.add_edge("aggregate_docs", "generate_answer") 138 | graph_builder.add_edge("generate_answer", END) 139 | 140 | graph = graph_builder.compile() 141 | 142 | 143 | if __name__ == "__main__": 144 | query = "What is task decomposition for LLM agents?" 145 | config = { 146 | "configurable": { 147 | "generated_questions_count": 3, 148 | "include_original_question": False, 149 | } 150 | } 151 | response = graph.invoke({"question": query}, config=config) 152 | 153 | rprint(Pretty(response, max_depth=2)) 154 | rprint(Markdown(response["answer"])) 155 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/raptor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/raptor/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/raptor/raptor.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langgraph.graph import END, START, StateGraph 6 | from llm_rag import llm 7 | from llm_rag.indexing.raptor.raptor import retriever 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | from rich.pretty import Pretty 11 | 12 | rag_prompt_template = """Answer the following question based on this context: 13 | 14 | {context} 15 | 16 | Question: {question} 17 | """ 18 | 19 | 20 | def format_docs(docs): 21 | return "\n\n".join(doc.page_content for doc in docs) 22 | 23 | 24 | class State(TypedDict): 25 | question: str 26 | context: list[Document] 27 | answer: str 28 | 29 | 30 | def retrieve(state: State): 31 | retrieved_docs = retriever.invoke(state["question"]) 32 | return {"context": retrieved_docs} 33 | 34 | 35 | def generate(state: State): 36 | docs_content = format_docs(state["context"]) 37 | rag_prompt = rag_prompt_template.format( 38 | question=state["question"], context=docs_content 39 | ) 40 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 41 | return {"answer": response.content} 42 | 43 | 44 | graph_builder = StateGraph(State) 45 | 46 | graph_builder.add_node("retrieve", retrieve) 47 | graph_builder.add_node("generate", generate) 48 | 49 | graph_builder.add_edge(START, "retrieve") 50 | graph_builder.add_edge("retrieve", "generate") 51 | graph_builder.add_edge("generate", END) 52 | 53 | graph = graph_builder.compile() 54 | 55 | 56 | if __name__ == "__main__": 57 | high_level_query = "What is this documentation about?" 58 | response = graph.invoke({"question": high_level_query}) 59 | rprint(Pretty(response, max_string=100, no_wrap=False)) 60 | rprint(Markdown(response["answer"])) 61 | 62 | mid_level_query = "What are the main components of LangGraph" 63 | response = graph.invoke({"question": mid_level_query}) 64 | rprint(Pretty(response, max_string=100, no_wrap=False)) 65 | rprint(Markdown(response["answer"])) 66 | 67 | low_level_query = "What is time travel?" 68 | response = graph.invoke({"question": low_level_query}) 69 | rprint(Pretty(response, max_string=100, no_wrap=False)) 70 | rprint(Markdown(response["answer"])) 71 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/routing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/routing/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/routing/logical.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, TypedDict 2 | 3 | from langchain_core.messages import HumanMessage, SystemMessage 4 | from langgraph.graph import END, START, StateGraph 5 | from llm_rag import llm 6 | from pydantic import BaseModel, Field 7 | from rich import print as rprint 8 | 9 | system_prompt = """You are an expert at routing a user question to the appropriate data source. 10 | 11 | Based on the programming language the question is referring to, route it to the relevant data source.""" 12 | 13 | 14 | class RouteInfo(BaseModel): 15 | """Route a user query to the most relevant data source.""" 16 | 17 | data_source: Literal["python_docs", "js_docs", "golang_docs"] = Field( 18 | ..., 19 | description="Given a user question choose which data source would be most relevant for answering their question", 20 | ) 21 | 22 | 23 | structured_llm = llm.with_structured_output(RouteInfo, method="function_calling") 24 | 25 | 26 | class State(TypedDict): 27 | question: str 28 | data_source: str 29 | context: str 30 | answer: str 31 | 32 | 33 | def select_data_source(state: State): 34 | response = structured_llm.invoke( 35 | [ 36 | SystemMessage(content=system_prompt), 37 | HumanMessage(content=state["question"]), 38 | ] 39 | ) 40 | return {"data_source": response.data_source} 41 | 42 | 43 | def route_query(state: State) -> Literal["python_docs", "js_docs", "golang_docs"]: 44 | return state["data_source"] 45 | 46 | 47 | def retrieve_python_docs(state: State): 48 | return {"context": "Python documentation"} 49 | 50 | 51 | def retrieve_js_docs(state: State): 52 | return {"context": "Javascript documentation"} 53 | 54 | 55 | def retrieve_golang_docs(state: State): 56 | return {"context": "Go documentation"} 57 | 58 | 59 | def generate_answer(state: State): 60 | return {"answer": f"Answer based on {state['context']}"} 61 | 62 | 63 | graph_builder = StateGraph(State) 64 | 65 | graph_builder.add_node("select_data_source", select_data_source) 66 | graph_builder.add_node("python_docs", retrieve_python_docs) 67 | graph_builder.add_node("js_docs", retrieve_js_docs) 68 | graph_builder.add_node("golang_docs", retrieve_golang_docs) 69 | graph_builder.add_node("generate_answer", generate_answer) 70 | 71 | graph_builder.add_edge(START, "select_data_source") 72 | graph_builder.add_conditional_edges( 73 | "select_data_source", route_query, ["python_docs", "js_docs", "golang_docs"] 74 | ) 75 | graph_builder.add_edge("python_docs", "generate_answer") 76 | graph_builder.add_edge("js_docs", "generate_answer") 77 | graph_builder.add_edge("golang_docs", "generate_answer") 78 | graph_builder.add_edge("generate_answer", END) 79 | 80 | graph = graph_builder.compile() 81 | 82 | 83 | if __name__ == "__main__": 84 | python_query = """Why doesn't the following code work: 85 | 86 | from langchain_core.prompts import ChatPromptTemplate 87 | 88 | prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"]) 89 | prompt.invoke("french") 90 | """ 91 | response = graph.invoke({"question": python_query}) 92 | rprint(response) 93 | 94 | javascript_query = """Which arguments has getElementById function?""" 95 | response = graph.invoke({"question": javascript_query}) 96 | rprint(response) 97 | 98 | golang_query = """What is struct?""" 99 | response = graph.invoke({"question": golang_query}) 100 | rprint(response) 101 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/routing/semantic.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_community.utils.math import cosine_similarity 4 | from langchain_core.messages import HumanMessage 5 | from langgraph.graph import END, START, StateGraph 6 | from llm_rag import embeddings, llm 7 | from rich import print as rprint 8 | from rich.markdown import Markdown 9 | 10 | prompt_names = ["PHYSICS", "MATH", "OTHER"] 11 | 12 | 13 | physics_prompt_template = """You are a very smart physics professor. 14 | You are great at answering questions about physics in a concise and easy to understand manner. 15 | When you don't know the answer to a question you admit that you don't know. 16 | 17 | Here is a question: 18 | {question}""" 19 | 20 | 21 | math_prompt_template = """You are a very good mathematician. You are great at answering math questions. 22 | You are so good because you are able to break down hard problems into their component parts, 23 | answer the component parts, and then put them together to answer the broader question. 24 | 25 | Here is a question: 26 | {question}""" 27 | 28 | 29 | other_prompt_template = f"""You are a helpful assistant. You are great at answering all questions not from the following themes: {prompt_names[:-1]} 30 | 31 | Here is a question: 32 | {{question}}""" 33 | 34 | 35 | prompt_templates = [ 36 | physics_prompt_template, 37 | math_prompt_template, 38 | other_prompt_template, 39 | ] 40 | prompt_embeddings = embeddings.embed_documents(prompt_templates) 41 | 42 | 43 | class State(TypedDict): 44 | question: str 45 | most_similar_prompt_idx: int 46 | most_similar_prompt_name: str 47 | answer: str 48 | 49 | 50 | def select_route_prompt(state: State): 51 | query_embedding = embeddings.embed_query(state["question"]) 52 | query_similarity = cosine_similarity([query_embedding], prompt_embeddings)[0] 53 | most_similar_prompt_idx = query_similarity.argmax() 54 | return { 55 | "most_similar_prompt_idx": most_similar_prompt_idx, 56 | "most_similar_prompt_name": prompt_names[most_similar_prompt_idx], 57 | } 58 | 59 | 60 | def generate_answer(state: State): 61 | route_prompt = prompt_templates[state["most_similar_prompt_idx"]].format( 62 | question=state["question"] 63 | ) 64 | response = llm.invoke([HumanMessage(content=route_prompt)]) 65 | return {"answer": response.content} 66 | 67 | 68 | graph_builder = StateGraph(State) 69 | 70 | graph_builder.add_node("select_route_prompt", select_route_prompt) 71 | graph_builder.add_node("generate_answer", generate_answer) 72 | 73 | graph_builder.add_edge(START, "select_route_prompt") 74 | graph_builder.add_edge("select_route_prompt", "generate_answer") 75 | graph_builder.add_edge("generate_answer", END) 76 | 77 | graph = graph_builder.compile() 78 | 79 | 80 | if __name__ == "__main__": 81 | queries = [ 82 | "What's a black hole", 83 | "What is the square root of 81", 84 | "Hello! How are you?", 85 | ] 86 | 87 | for query in queries: 88 | print(query) 89 | response = graph.invoke({"question": query}) 90 | rprint(response) 91 | rprint(Markdown(response["answer"])) 92 | rprint("=" * 50) 93 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/self_rag/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/self_rag/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/self_rag/self_rag.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langchain_core.runnables import chain 6 | from langgraph.graph import END, START, StateGraph 7 | from pydantic import BaseModel, Field 8 | from rich import print as rprint 9 | from rich.markdown import Markdown 10 | from rich.pretty import Pretty 11 | 12 | from llm_rag import llm 13 | from llm_rag.indexing.reflection import retriever 14 | 15 | retrieval_prompt_template = """You are grader assistant assessing the need to retrieve additional documents to answer the user's question. 16 | If you are sure that all the necessary data is available, then you do not need to retrieve additional documents. 17 | Give a binary score to indicate whether retrieval is required. 18 | 19 | User question: 20 | {question} 21 | """ 22 | 23 | rag_prompt_template = """Answer the following question based on this context: 24 | 25 | {context} 26 | 27 | Question: {question} 28 | """ 29 | 30 | answer_prompt_template = """Answer the following question: 31 | 32 | Question: {question} 33 | """ 34 | 35 | no_answer_prompt = "I don't have an answer to the question." 36 | 37 | relevance_grading_prompt_template = """You are a grader assessing relevance of a retrieved document to a user question. 38 | It does not need to be a stringent test. The goal is to filter out erroneous retrievals. 39 | If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. 40 | Give a binary score to indicate whether the document is relevant to the question. 41 | 42 | Retrieved document: 43 | {document} 44 | 45 | User question: 46 | {question} 47 | """ 48 | 49 | hallucinations_grading_prompt_template = """You are a grader assessing whether an LLM answer is grounded in / supported by a set of retrieved facts. 50 | Give a binary score whether the answer is grounded in / supported by the set of facts. 51 | 52 | Set of facts: 53 | {context} 54 | 55 | LLM answer: 56 | {answer} 57 | """ 58 | 59 | answer_grading_prompt_template = """You are a grader assessing whether an answer addresses / resolves a question. 60 | Give a binary score whether the answer resolves the question. 61 | 62 | User question: 63 | {question} 64 | 65 | LLM answer: 66 | {answer} 67 | """ 68 | 69 | query_rewriting_prompt_template = """You a question re-writer that converts an input question to a better version that is optimized for web search. 70 | Look at the input and try to reason about the underlying semantic intent / meaning. 71 | 72 | Here is the initial question: 73 | {question} 74 | 75 | Formulate an improved question.""" 76 | 77 | 78 | def format_docs(docs: list[Document]) -> list[str]: 79 | return "\n\n".join(doc.page_content for doc in docs) 80 | 81 | 82 | class RetrievalGrade(BaseModel): 83 | """Check if retrieval of additional documents is required.""" 84 | 85 | chain_of_thought: str = Field( 86 | ..., 87 | description="Step by step reasoning to check if retrieval of additional documents is required", 88 | ) 89 | is_required: bool = Field( 90 | description="Retrieval of additional documents is required" 91 | ) 92 | 93 | 94 | retrieval_grader_llm = llm.with_structured_output( 95 | RetrievalGrade, method="function_calling" 96 | ) 97 | 98 | 99 | class RelevanceGrade(BaseModel): 100 | """Relevance check on retrieved document.""" 101 | 102 | chain_of_thought: str = Field( 103 | ..., 104 | description="Step by step reasoning to check if the document is relevant to the question", 105 | ) 106 | is_relevant: bool = Field(description="Document is relevant to the question") 107 | 108 | 109 | relevance_grader_llm = llm.with_structured_output( 110 | RelevanceGrade, method="function_calling" 111 | ) 112 | 113 | 114 | @chain 115 | def grade_document_relevance(document, question): 116 | relevance_grading_prompt = relevance_grading_prompt_template.format( 117 | document=document, question=question 118 | ) 119 | response = relevance_grader_llm.invoke( 120 | [HumanMessage(content=relevance_grading_prompt)] 121 | ) 122 | return response 123 | 124 | 125 | class HallucationsGrade(BaseModel): 126 | """Hallucination check in generated answer.""" 127 | 128 | chain_of_thought: str = Field( 129 | ..., 130 | description="Step by step reasoning to check if the answer is grounded in the facts", 131 | ) 132 | is_grounded: bool = Field(description="Answer is grounded in the facts") 133 | 134 | 135 | hallucations_grader_llm = llm.with_structured_output( 136 | HallucationsGrade, method="function_calling" 137 | ) 138 | 139 | 140 | class AnswerGrade(BaseModel): 141 | """Check if answer addresses the question.""" 142 | 143 | chain_of_thought: str = Field( 144 | ..., 145 | description="Step by step reasoning to check if the answer addresses the questions", 146 | ) 147 | is_useful: bool = Field(description="Answer addresses the question") 148 | 149 | 150 | answer_grader_llm = llm.with_structured_output(AnswerGrade, method="function_calling") 151 | 152 | 153 | class SearchQuery(BaseModel): 154 | """Question optimization for search.""" 155 | 156 | chain_of_thought: str = Field( 157 | ..., description="Step by step reasoning to optimize query for search" 158 | ) 159 | search_query: str = Field(description="Optimized search query") 160 | 161 | 162 | search_llm = llm.with_structured_output(SearchQuery, method="function_calling") 163 | 164 | 165 | class State(TypedDict): 166 | question: str 167 | retrieval_grade: RetrievalGrade 168 | documents: list[Document] 169 | relevance_grades: list[RelevanceGrade] 170 | generation: str 171 | hallucinations_grade: HallucationsGrade 172 | context: list[Document] 173 | answer_grade: AnswerGrade 174 | answer: str 175 | 176 | 177 | def grade_retrieval(state: State): 178 | question = state["question"] 179 | retrieval_prompt = retrieval_prompt_template.format(question=question) 180 | retrieval_grade = retrieval_grader_llm.invoke(retrieval_prompt) 181 | return {"retrieval_grade": retrieval_grade} 182 | 183 | 184 | def decide_to_retrieve(state: State) -> Literal["retrieve", "generate_answer"]: 185 | retrieval_grade = state["retrieval_grade"] 186 | 187 | if retrieval_grade.is_required: 188 | return "retrieve" 189 | else: 190 | return "generate_answer" 191 | 192 | 193 | def retrieve(state: State): 194 | question = state["question"] 195 | documents = retriever.invoke(question) 196 | return {"documents": documents} 197 | 198 | 199 | def grade_documents(state: State): 200 | question = state["question"] 201 | documents = state["documents"] 202 | 203 | relevance_grades = grade_document_relevance.batch(documents, question=question) 204 | filtered_documents = [ 205 | document 206 | for (document, relevance_grade) in zip(documents, relevance_grades) 207 | if relevance_grade.is_relevant 208 | ] 209 | 210 | return {"context": filtered_documents, "relevance_grades": relevance_grades} 211 | 212 | 213 | def check_documents_relevance( 214 | state: State, 215 | ) -> Literal["generate_rag_answer", "generate_no_answer"]: 216 | filtered_documents = state["context"] 217 | 218 | if len(filtered_documents) > 0: 219 | return "generate_rag_answer" 220 | else: 221 | return "generate_no_answer" 222 | 223 | 224 | def generate_rag_answer(state: State): 225 | docs_content = format_docs(state["context"]) 226 | rag_prompt = rag_prompt_template.format( 227 | question=state["question"], context=docs_content 228 | ) 229 | response = llm.invoke([HumanMessage(content=rag_prompt)]) 230 | return {"answer": response.content} 231 | 232 | 233 | def generate_answer(state: State): 234 | answer_prompt = answer_prompt_template.format(question=state["question"]) 235 | response = llm.invoke([HumanMessage(content=answer_prompt)]) 236 | return {"answer": response.content} 237 | 238 | 239 | def generate_no_answer(state: State): 240 | return {"answer": no_answer_prompt} 241 | 242 | 243 | def grade_hallucinations(state: State): 244 | filtered_documents = state["context"] 245 | answer = state["answer"] 246 | hallucinations_grading_prompt = hallucinations_grading_prompt_template.format( 247 | context=filtered_documents, answer=answer 248 | ) 249 | hallucinations_grade = hallucations_grader_llm.invoke(hallucinations_grading_prompt) 250 | return {"hallucinations_grade": hallucinations_grade} 251 | 252 | 253 | def check_hallucinations( 254 | state: State, 255 | ) -> Literal["grade_answer", "generate_rag_answer"]: 256 | hallucinations_grade = state["hallucinations_grade"] 257 | 258 | if hallucinations_grade.is_grounded: 259 | return "grade_answer" 260 | else: 261 | return "generate_rag_answer" 262 | 263 | 264 | def grade_answer(state: State): 265 | question = state["question"] 266 | answer = state["answer"] 267 | answer_grading_prompt = answer_grading_prompt_template.format( 268 | question=question, answer=answer 269 | ) 270 | answer_grade = answer_grader_llm.invoke(answer_grading_prompt) 271 | return {"answer_grade": answer_grade} 272 | 273 | 274 | def check_answer(state: State) -> Literal["__end__", "rewrite_query"]: 275 | answer_grade = state["answer_grade"] 276 | 277 | if answer_grade.is_useful: 278 | return "__end__" 279 | else: 280 | return "rewrite_query" 281 | 282 | 283 | def rewrite_query(state: State): 284 | question = state["question"] 285 | query_rewriting_prompt = query_rewriting_prompt_template.format(question=question) 286 | response = search_llm.invoke(query_rewriting_prompt) 287 | return {"question": response.search_query} 288 | 289 | 290 | graph_builder = StateGraph(State) 291 | 292 | graph_builder.add_edge(START, "grade_retrieval") 293 | graph_builder.add_node("grade_retrieval", grade_retrieval) 294 | graph_builder.add_conditional_edges("grade_retrieval", decide_to_retrieve) 295 | 296 | graph_builder.add_node("generate_answer", generate_answer) 297 | graph_builder.add_edge("generate_answer", END) 298 | 299 | graph_builder.add_node("retrieve", retrieve) 300 | graph_builder.add_edge("retrieve", "grade_documents") 301 | graph_builder.add_node("grade_documents", grade_documents) 302 | graph_builder.add_conditional_edges("grade_documents", check_documents_relevance) 303 | 304 | graph_builder.add_node("generate_rag_answer", generate_rag_answer) 305 | graph_builder.add_edge("generate_rag_answer", "grade_hallucinations") 306 | graph_builder.add_node("grade_hallucinations", grade_hallucinations) 307 | graph_builder.add_conditional_edges("grade_hallucinations", check_hallucinations) 308 | 309 | graph_builder.add_node("generate_no_answer", generate_no_answer) 310 | graph_builder.add_edge("generate_no_answer", END) 311 | 312 | graph_builder.add_node("grade_answer", grade_answer) 313 | graph_builder.add_conditional_edges("grade_answer", check_answer) 314 | 315 | graph_builder.add_node("rewrite_query", rewrite_query) 316 | graph_builder.add_edge("rewrite_query", "retrieve") 317 | 318 | graph = graph_builder.compile() 319 | 320 | 321 | if __name__ == "__main__": 322 | queries = [ 323 | "What are common types of agent memory?", 324 | "What are recent types of adversarial attacks in LLM?", 325 | "How does the AlphaCodium paper work?", 326 | ] 327 | 328 | for query in queries: 329 | response = graph.invoke({"question": query}) 330 | rprint(Pretty(response)) 331 | rprint(Markdown(response["answer"])) 332 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/step_back/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/step_back/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/graphs/step_back/step_back.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.documents import Document 4 | from langchain_core.messages import HumanMessage 5 | from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate 6 | from langgraph.graph import END, START, StateGraph 7 | from llm_rag import llm 8 | from llm_rag.indexing.article import vectorstore 9 | from rich import print as rprint 10 | from rich.markdown import Markdown 11 | from rich.pretty import Pretty 12 | 13 | step_back_prompt_template = "You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:" 14 | examples = [ 15 | { 16 | "input": "Could the members of The Police perform lawful arrests?", 17 | "output": "what can the members of The Police do?", 18 | }, 19 | { 20 | "input": "Jan Sindel’s was born in what country?", 21 | "output": "what is Jan Sindel’s personal history?", 22 | }, 23 | ] 24 | example_prompt = ChatPromptTemplate.from_messages( 25 | [ 26 | ("human", "{input}"), 27 | ("ai", "{output}"), 28 | ] 29 | ) 30 | few_shot_prompt = FewShotChatMessagePromptTemplate( 31 | example_prompt=example_prompt, 32 | examples=examples, 33 | ) 34 | step_back_prompt = ChatPromptTemplate.from_messages( 35 | [("system", step_back_prompt_template), few_shot_prompt, ("human", "{question}")] 36 | ) 37 | 38 | 39 | final_answer_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant. 40 | 41 | {context} 42 | {step_back_context} 43 | 44 | Original Question: {question} 45 | Answer:""" 46 | 47 | 48 | class State(TypedDict): 49 | question: str 50 | context: list[Document] 51 | step_back_question: str 52 | step_back_context: list[Document] 53 | answer: str 54 | 55 | 56 | def retrieve_docs(state: State): 57 | question = state["question"] 58 | retrieved_docs = vectorstore.similarity_search(question) 59 | return {"context": retrieved_docs} 60 | 61 | 62 | def generate_step_back_question(state: State): 63 | step_back_prompt_messages = step_back_prompt.format(question=state["question"]) 64 | step_back_question = llm.invoke(step_back_prompt_messages) 65 | return {"step_back_question": step_back_question.content} 66 | 67 | 68 | def retrieve_step_back_docs(state: State): 69 | step_back_question = state["step_back_question"] 70 | retrieved_step_back_docs = vectorstore.similarity_search(step_back_question) 71 | return {"step_back_context": retrieved_step_back_docs} 72 | 73 | 74 | def generate_answer(state: State): 75 | final_answer_prompt = final_answer_prompt_template.format( 76 | context=state["context"], 77 | step_back_context=state["step_back_context"], 78 | question=state["question"], 79 | ) 80 | response = llm.invoke([HumanMessage(content=final_answer_prompt)]) 81 | return {"answer": response.content} 82 | 83 | 84 | graph_builder = StateGraph(State) 85 | 86 | graph_builder.add_node("retrieve_docs", retrieve_docs) 87 | graph_builder.add_node("generate_step_back_question", generate_step_back_question) 88 | graph_builder.add_node("retrieve_step_back_docs", retrieve_step_back_docs) 89 | graph_builder.add_node("generate_answer", generate_answer) 90 | 91 | graph_builder.add_edge(START, "retrieve_docs") 92 | graph_builder.add_edge("retrieve_docs", "generate_step_back_question") 93 | graph_builder.add_edge("generate_step_back_question", "retrieve_step_back_docs") 94 | graph_builder.add_edge("retrieve_step_back_docs", "generate_answer") 95 | graph_builder.add_edge("generate_answer", END) 96 | 97 | graph = graph_builder.compile() 98 | 99 | 100 | if __name__ == "__main__": 101 | query = "What is task decomposition for LLM agents?" 102 | response = graph.invoke( 103 | {"question": query}, 104 | ) 105 | 106 | rprint(Pretty(response, max_depth=2)) 107 | rprint(Markdown(response["answer"])) 108 | -------------------------------------------------------------------------------- /src/llm_rag/graphs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/utils.py -------------------------------------------------------------------------------- /src/llm_rag/indexing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/indexing/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/indexing/article.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter 3 | from langchain_community.document_loaders import WebBaseLoader 4 | from langchain_core.vectorstores import InMemoryVectorStore 5 | from llm_rag import embeddings 6 | 7 | 8 | def load_documents(): 9 | loader = WebBaseLoader( 10 | web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), 11 | bs_kwargs=dict( 12 | parse_only=bs4.SoupStrainer( 13 | class_=("post-content", "post-title", "post-header") 14 | ) 15 | ), 16 | ) 17 | docs = loader.load() 18 | return docs 19 | 20 | 21 | def prepare_vectorstore(docs, embeddings): 22 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 23 | splits = text_splitter.split_documents(docs) 24 | vectorstore = InMemoryVectorStore(embeddings) 25 | vectorstore.add_documents(documents=splits) 26 | return vectorstore 27 | 28 | 29 | docs = load_documents() 30 | vectorstore = prepare_vectorstore(docs, embeddings) 31 | retriever = vectorstore.as_retriever() 32 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/colbert_model.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter 3 | from langchain_community.document_loaders import WebBaseLoader 4 | from ragatouille import RAGPretrainedModel 5 | 6 | 7 | def load_documents(): 8 | articles = [ 9 | "https://lilianweng.github.io/posts/2023-06-23-agent/", 10 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/", 11 | ] 12 | 13 | loader = WebBaseLoader( 14 | web_paths=articles, 15 | bs_kwargs=dict( 16 | parse_only=bs4.SoupStrainer( 17 | class_=("post-content", "post-title", "post-header") 18 | ) 19 | ), 20 | ) 21 | docs = loader.load() 22 | return docs 23 | 24 | 25 | def prepare_model(docs): 26 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0) 27 | splits = text_splitter.split_documents(docs) 28 | 29 | docs_texts = [doc.page_content for doc in splits] 30 | docs_metadatas = [doc.metadata for doc in splits] 31 | 32 | model = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") 33 | model.index( 34 | collection=docs_texts, 35 | document_metadatas=docs_metadatas, 36 | index_name="blog", 37 | split_documents=False, 38 | ) 39 | 40 | return model 41 | 42 | 43 | docs = load_documents() 44 | model = prepare_model(docs) 45 | retriever = model.as_langchain_retriever(k=10) 46 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/multi_vector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/indexing/multi_vector/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/indexing/multi_vector/chunks.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | import bs4 4 | from langchain.retrievers.multi_vector import MultiVectorRetriever 5 | from langchain.text_splitter import RecursiveCharacterTextSplitter 6 | from langchain_community.document_loaders import WebBaseLoader 7 | from langchain_core.stores import InMemoryByteStore 8 | from langchain_core.vectorstores import InMemoryVectorStore 9 | from llm_rag import embeddings 10 | 11 | summarization_prompt_template = "Summarize the following document:\n\n{doc}" 12 | 13 | 14 | def load_documents(): 15 | articles = [ 16 | "https://lilianweng.github.io/posts/2023-06-23-agent/", 17 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/", 18 | ] 19 | loader = WebBaseLoader( 20 | web_paths=articles, 21 | bs_kwargs=dict( 22 | parse_only=bs4.SoupStrainer( 23 | class_=("post-content", "post-title", "post-header") 24 | ) 25 | ), 26 | ) 27 | docs = loader.load() 28 | return docs 29 | 30 | 31 | def prepare_retriever(docs, embeddings): 32 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0) 33 | splits = text_splitter.split_documents(docs) 34 | 35 | vectorstore = InMemoryVectorStore(embeddings) 36 | store = InMemoryByteStore() 37 | id_key = "split_id" 38 | 39 | retriever = MultiVectorRetriever( 40 | vectorstore=vectorstore, 41 | byte_store=store, 42 | id_key=id_key, 43 | ) 44 | 45 | split_ids = [str(uuid.uuid4()) for _ in splits] 46 | 47 | child_text_splitter = RecursiveCharacterTextSplitter( 48 | chunk_size=1000, chunk_overlap=0 49 | ) 50 | 51 | all_sub_splits = [] 52 | 53 | for i, split in enumerate(splits): 54 | split_id = split_ids[i] 55 | sub_splits = child_text_splitter.split_documents([split]) 56 | 57 | for sub_split in sub_splits: 58 | sub_split.metadata[id_key] = split_id 59 | 60 | all_sub_splits.extend(sub_splits) 61 | 62 | retriever.vectorstore.add_documents(all_sub_splits) 63 | retriever.docstore.mset(list(zip(split_ids, splits))) 64 | 65 | return retriever 66 | 67 | 68 | docs = load_documents() 69 | retriever = prepare_retriever(docs, embeddings) 70 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/multi_vector/hypothetical_questions.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | import bs4 4 | from langchain.retrievers.multi_vector import MultiVectorRetriever 5 | from langchain_community.document_loaders import WebBaseLoader 6 | from langchain_core.documents import Document 7 | from langchain_core.messages import HumanMessage 8 | from langchain_core.runnables import chain 9 | from langchain_core.stores import InMemoryByteStore 10 | from langchain_core.vectorstores import InMemoryVectorStore 11 | from llm_rag import embeddings, llm 12 | from pydantic import BaseModel, Field 13 | 14 | hypothetical_questions_prompt_template = "Generate a list of exactly {hypothetical_questions_count} hypothetical questions that the below document could be used to answer:\n\n{doc}" 15 | 16 | 17 | def load_documents(): 18 | articles = [ 19 | "https://lilianweng.github.io/posts/2023-06-23-agent/", 20 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/", 21 | ] 22 | loader = WebBaseLoader( 23 | web_paths=articles, 24 | bs_kwargs=dict( 25 | parse_only=bs4.SoupStrainer( 26 | class_=("post-content", "post-title", "post-header") 27 | ) 28 | ), 29 | ) 30 | docs = loader.load() 31 | return docs 32 | 33 | 34 | class HypotheticalQuestions(BaseModel): 35 | """Generate hypothetical questions.""" 36 | 37 | questions: list[str] = Field(..., description="List of questions") 38 | 39 | 40 | @chain 41 | def generate_hypothetical_questions(doc, hypothetical_questions_count=3): 42 | hypothetical_questions_prompt = hypothetical_questions_prompt_template.format( 43 | hypothetical_questions_count=hypothetical_questions_count, doc=doc.page_content 44 | ) 45 | structured_llm = llm.with_structured_output(HypotheticalQuestions) 46 | response = structured_llm.invoke( 47 | [HumanMessage(content=hypothetical_questions_prompt)] 48 | ) 49 | return response.questions 50 | 51 | 52 | def prepare_retriever(docs, embeddings): 53 | vectorstore = InMemoryVectorStore(embeddings) 54 | store = InMemoryByteStore() 55 | id_key = "doc_id" 56 | 57 | retriever = MultiVectorRetriever( 58 | vectorstore=vectorstore, 59 | byte_store=store, 60 | id_key=id_key, 61 | ) 62 | 63 | hypothetical_questions = generate_hypothetical_questions.batch( 64 | docs, {"max_concurrency": len(docs)} 65 | ) 66 | doc_ids = [str(uuid.uuid4()) for _ in docs] 67 | 68 | question_docs = [] 69 | 70 | for i, questions in enumerate(hypothetical_questions): 71 | question_docs.extend( 72 | [ 73 | Document(page_content=question, metadata={id_key: doc_ids[i]}) 74 | for question in questions 75 | ] 76 | ) 77 | 78 | retriever.vectorstore.add_documents(question_docs) 79 | retriever.docstore.mset(list(zip(doc_ids, docs))) 80 | 81 | return retriever 82 | 83 | 84 | docs = load_documents() 85 | retriever = prepare_retriever(docs, embeddings) 86 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/multi_vector/summary.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | import bs4 4 | from langchain.retrievers.multi_vector import MultiVectorRetriever 5 | from langchain_community.document_loaders import WebBaseLoader 6 | from langchain_core.documents import Document 7 | from langchain_core.messages import HumanMessage 8 | from langchain_core.runnables import chain 9 | from langchain_core.stores import InMemoryByteStore 10 | from langchain_core.vectorstores import InMemoryVectorStore 11 | from llm_rag import embeddings, llm 12 | 13 | summarization_prompt_template = "Summarize the following document:\n\n{doc}" 14 | 15 | 16 | def load_documents(): 17 | articles = [ 18 | "https://lilianweng.github.io/posts/2023-06-23-agent/", 19 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/", 20 | ] 21 | loader = WebBaseLoader( 22 | web_paths=articles, 23 | bs_kwargs=dict( 24 | parse_only=bs4.SoupStrainer( 25 | class_=("post-content", "post-title", "post-header") 26 | ) 27 | ), 28 | ) 29 | docs = loader.load() 30 | return docs 31 | 32 | 33 | @chain 34 | def summarize_document(doc): 35 | summarization_prompt = summarization_prompt_template.format(doc=doc.page_content) 36 | response = llm.invoke([HumanMessage(content=summarization_prompt)]) 37 | return response.content 38 | 39 | 40 | def prepare_retriever(docs, embeddings): 41 | vectorstore = InMemoryVectorStore(embeddings) 42 | store = InMemoryByteStore() 43 | id_key = "doc_id" 44 | 45 | retriever = MultiVectorRetriever( 46 | vectorstore=vectorstore, 47 | byte_store=store, 48 | id_key=id_key, 49 | ) 50 | 51 | summaries = summarize_document.batch(docs, {"max_concurrency": len(docs)}) 52 | doc_ids = [str(uuid.uuid4()) for _ in docs] 53 | 54 | summary_docs = [ 55 | Document(page_content=summary, metadata={id_key: doc_ids[i]}) 56 | for i, summary in enumerate(summaries) 57 | ] 58 | 59 | retriever.vectorstore.add_documents(summary_docs) 60 | retriever.docstore.mset(list(zip(doc_ids, docs))) 61 | 62 | return retriever 63 | 64 | 65 | docs = load_documents() 66 | retriever = prepare_retriever(docs, embeddings) 67 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/raptor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/indexing/raptor/__init__.py -------------------------------------------------------------------------------- /src/llm_rag/indexing/raptor/raptor.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup as Soup 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter 3 | from langchain_community.document_loaders import RecursiveUrlLoader 4 | from langchain_core.documents import Document 5 | from langchain_core.vectorstores import InMemoryVectorStore 6 | from llm_rag import embeddings 7 | from llm_rag.indexing.raptor.utils import recursive_embed_cluster_summarize 8 | 9 | 10 | def load_documents(): 11 | url = "https://langchain-ai.github.io/langgraph/tutorials/introduction/" 12 | loader = RecursiveUrlLoader( 13 | url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text 14 | ) 15 | introduction_docs = loader.load() 16 | 17 | url = "https://langchain-ai.github.io/langgraph/concepts/" 18 | loader = RecursiveUrlLoader( 19 | url=url, max_depth=2, extractor=lambda x: Soup(x, "html.parser").text 20 | ) 21 | concepts_docs = loader.load() 22 | 23 | docs = introduction_docs + concepts_docs 24 | return docs 25 | 26 | 27 | def prepare_vectorstore(docs): 28 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=0) 29 | splits = text_splitter.split_documents(docs) 30 | 31 | leaf_texts = [doc.page_content for doc in splits] 32 | results = recursive_embed_cluster_summarize(leaf_texts, level=1, n_levels=3) 33 | 34 | all_docs = [ 35 | Document(page_content=text, metadata={"level": 0}) for text in leaf_texts 36 | ] 37 | 38 | for level in sorted(results.keys()): 39 | all_docs.extend( 40 | [ 41 | Document(page_content=summary, metadata={"level": level}) 42 | for summary in results[level][1]["summaries"] 43 | ] 44 | ) 45 | 46 | vectorstore = InMemoryVectorStore(embeddings) 47 | vectorstore.add_documents(documents=all_docs) 48 | return vectorstore 49 | 50 | 51 | docs = load_documents() 52 | vectorstore = prepare_vectorstore(docs) 53 | retriever = vectorstore.as_retriever( 54 | search_kwargs={"k": 10}, 55 | ) 56 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/raptor/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import umap 4 | from langchain_core.messages import HumanMessage 5 | from langchain_core.runnables import chain 6 | from llm_rag import embeddings, llm 7 | from sklearn.mixture import GaussianMixture 8 | 9 | RANDOM_SEED = 42 # Fixed seed for reproducibility 10 | 11 | 12 | def global_cluster_embeddings( 13 | embeddings: np.ndarray, 14 | dim: int, 15 | n_neighbors: int | None = None, 16 | metric: str = "cosine", 17 | ) -> np.ndarray: 18 | """ 19 | Perform global dimensionality reduction on the embeddings using UMAP. 20 | 21 | Parameters: 22 | - embeddings: The input embeddings as a numpy array. 23 | - dim: The target dimensionality for the reduced space. 24 | - n_neighbors: Optional; the number of neighbors to consider for each point. 25 | If not provided, it defaults to the square root of the number of embeddings. 26 | - metric: The distance metric to use for UMAP. 27 | 28 | Returns: 29 | - A numpy array of the embeddings reduced to the specified dimensionality. 30 | """ 31 | if n_neighbors is None: 32 | n_neighbors = int((len(embeddings) - 1) ** 0.5) 33 | return umap.UMAP( 34 | n_neighbors=n_neighbors, n_components=dim, metric=metric 35 | ).fit_transform(embeddings) 36 | 37 | 38 | def local_cluster_embeddings( 39 | embeddings: np.ndarray, dim: int, num_neighbors: int = 10, metric: str = "cosine" 40 | ) -> np.ndarray: 41 | """ 42 | Perform local dimensionality reduction on the embeddings using UMAP, typically after global clustering. 43 | 44 | Parameters: 45 | - embeddings: The input embeddings as a numpy array. 46 | - dim: The target dimensionality for the reduced space. 47 | - num_neighbors: The number of neighbors to consider for each point. 48 | - metric: The distance metric to use for UMAP. 49 | 50 | Returns: 51 | - A numpy array of the embeddings reduced to the specified dimensionality. 52 | """ 53 | return umap.UMAP( 54 | n_neighbors=num_neighbors, n_components=dim, metric=metric 55 | ).fit_transform(embeddings) 56 | 57 | 58 | def get_optimal_clusters( 59 | embeddings: np.ndarray, max_clusters: int = 50, random_state: int = RANDOM_SEED 60 | ) -> int: 61 | """ 62 | Determine the optimal number of clusters using the Bayesian Information Criterion (BIC) with a Gaussian Mixture Model. 63 | 64 | Parameters: 65 | - embeddings: The input embeddings as a numpy array. 66 | - max_clusters: The maximum number of clusters to consider. 67 | - random_state: Seed for reproducibility. 68 | 69 | Returns: 70 | - An integer representing the optimal number of clusters found. 71 | """ 72 | max_clusters = min(max_clusters, len(embeddings)) 73 | n_clusters = np.arange(1, max_clusters) 74 | bics = [] 75 | 76 | for n in n_clusters: 77 | gm = GaussianMixture(n_components=n, random_state=random_state) 78 | gm.fit(embeddings) 79 | bics.append(gm.bic(embeddings)) 80 | 81 | return n_clusters[np.argmin(bics)] 82 | 83 | 84 | def GMM_cluster( 85 | embeddings: np.ndarray, threshold: float, random_state: int = RANDOM_SEED 86 | ): 87 | """ 88 | Cluster embeddings using a Gaussian Mixture Model (GMM) based on a probability threshold. 89 | 90 | Parameters: 91 | - embeddings: The input embeddings as a numpy array. 92 | - threshold: The probability threshold for assigning an embedding to a cluster. 93 | - random_state: Seed for reproducibility. 94 | 95 | Returns: 96 | - A tuple containing the cluster labels and the number of clusters determined. 97 | """ 98 | n_clusters = get_optimal_clusters(embeddings) 99 | gm = GaussianMixture(n_components=n_clusters, random_state=random_state) 100 | gm.fit(embeddings) 101 | probs = gm.predict_proba(embeddings) 102 | labels = [np.where(prob > threshold)[0] for prob in probs] 103 | return labels, n_clusters 104 | 105 | 106 | def perform_clustering( 107 | embeddings: np.ndarray, 108 | dim: int, 109 | threshold: float, 110 | ) -> list[np.ndarray]: 111 | """ 112 | Perform clustering on the embeddings by first reducing their dimensionality globally, then clustering 113 | using a Gaussian Mixture Model, and finally performing local clustering within each global cluster. 114 | 115 | Parameters: 116 | - embeddings: The input embeddings as a numpy array. 117 | - dim: The target dimensionality for UMAP reduction. 118 | - threshold: The probability threshold for assigning an embedding to a cluster in GMM. 119 | 120 | Returns: 121 | - A list of numpy arrays, where each array contains the cluster IDs for each embedding. 122 | """ 123 | if len(embeddings) <= dim + 1: 124 | # Avoid clustering when there's insufficient data 125 | return [np.array([0]) for _ in range(len(embeddings))] 126 | 127 | # Global dimensionality reduction 128 | reduced_embeddings_global = global_cluster_embeddings(embeddings, dim) 129 | # Global clustering 130 | global_clusters, n_global_clusters = GMM_cluster( 131 | reduced_embeddings_global, threshold 132 | ) 133 | 134 | all_local_clusters = [np.array([]) for _ in range(len(embeddings))] 135 | total_clusters = 0 136 | 137 | # Iterate through each global cluster to perform local clustering 138 | for i in range(n_global_clusters): 139 | # Extract embeddings belonging to the current global cluster 140 | global_cluster_embeddings_ = embeddings[ 141 | np.array([i in gc for gc in global_clusters]) 142 | ] 143 | 144 | if len(global_cluster_embeddings_) == 0: 145 | continue 146 | if len(global_cluster_embeddings_) <= dim + 1: 147 | # Handle small clusters with direct assignment 148 | local_clusters = [np.array([0]) for _ in global_cluster_embeddings_] 149 | n_local_clusters = 1 150 | else: 151 | # Local dimensionality reduction and clustering 152 | reduced_embeddings_local = local_cluster_embeddings( 153 | global_cluster_embeddings_, dim 154 | ) 155 | local_clusters, n_local_clusters = GMM_cluster( 156 | reduced_embeddings_local, threshold 157 | ) 158 | 159 | # Assign local cluster IDs, adjusting for total clusters already processed 160 | for j in range(n_local_clusters): 161 | local_cluster_embeddings_ = global_cluster_embeddings_[ 162 | np.array([j in lc for lc in local_clusters]) 163 | ] 164 | indices = np.where( 165 | (embeddings == local_cluster_embeddings_[:, None]).all(-1) 166 | )[1] 167 | for idx in indices: 168 | all_local_clusters[idx] = np.append( 169 | all_local_clusters[idx], j + total_clusters 170 | ) 171 | 172 | total_clusters += n_local_clusters 173 | 174 | return all_local_clusters 175 | 176 | 177 | ### --- Our code below --- ### 178 | 179 | 180 | def embed(texts): 181 | """ 182 | Generate embeddings for a list of text documents. 183 | 184 | This function assumes the existence of an `embd` object with a method `embed_documents` 185 | that takes a list of texts and returns their embeddings. 186 | 187 | Parameters: 188 | - texts: List[str], a list of text documents to be embedded. 189 | 190 | Returns: 191 | - numpy.ndarray: An array of embeddings for the given text documents. 192 | """ 193 | text_embeddings = embeddings.embed_documents(texts) 194 | text_embeddings_np = np.array(text_embeddings) 195 | return text_embeddings_np 196 | 197 | 198 | def embed_cluster_texts(texts): 199 | """ 200 | Embeds a list of texts and clusters them, returning a DataFrame with texts, their embeddings, and cluster labels. 201 | 202 | This function combines embedding generation and clustering into a single step. It assumes the existence 203 | of a previously defined `perform_clustering` function that performs clustering on the embeddings. 204 | 205 | Parameters: 206 | - texts: List[str], a list of text documents to be processed. 207 | 208 | Returns: 209 | - pandas.DataFrame: A DataFrame containing the original texts, their embeddings, and the assigned cluster labels. 210 | """ 211 | text_embeddings_np = embed(texts) # Generate embeddings 212 | cluster_labels = perform_clustering( 213 | text_embeddings_np, 10, 0.1 214 | ) # Perform clustering on the embeddings 215 | df = pd.DataFrame() # Initialize a DataFrame to store the results 216 | df["text"] = texts # Store original texts 217 | df["embd"] = list(text_embeddings_np) # Store embeddings as a list in the DataFrame 218 | df["cluster"] = cluster_labels # Store cluster labels 219 | return df 220 | 221 | 222 | def format_texts(texts: list[str]) -> str: 223 | """ 224 | Formats the text documents in a DataFrame into a single string. 225 | 226 | Parameters: 227 | - texts: List of texts to format. 228 | 229 | Returns: 230 | - A single string where all text documents are joined by a specific delimiter. 231 | """ 232 | return "--- --- \n --- --- ".join(texts) 233 | 234 | 235 | def embed_cluster_summarize_texts( 236 | texts: list[str], level: int 237 | ) -> tuple[pd.DataFrame, pd.DataFrame]: 238 | """ 239 | Embeds, clusters, and summarizes a list of texts. This function first generates embeddings for the texts, 240 | clusters them based on similarity, expands the cluster assignments for easier processing, and then summarizes 241 | the content within each cluster. 242 | 243 | Parameters: 244 | - texts: A list of text documents to be processed. 245 | - level: An integer parameter that could define the depth or detail of processing. 246 | 247 | Returns: 248 | - Tuple containing two DataFrames: 249 | 1. The first DataFrame (`df_clusters`) includes the original texts, their embeddings, and cluster assignments. 250 | 2. The second DataFrame (`df_summary`) contains summaries for each cluster, the specified level of detail, 251 | and the cluster identifiers. 252 | """ 253 | # Summarization 254 | prompt_template = """Here is a subset of LangGraph docs. 255 | 256 | LangGraph is a low-level orchestration framework for building controllable agents. 257 | 258 | Give a detailed summary of the documentation provided. 259 | 260 | Documentation: 261 | {context} 262 | """ 263 | 264 | @chain 265 | def summarize_cluster(texts): 266 | formatted_txt = format_texts(texts) 267 | prompt = prompt_template.format(context=formatted_txt) 268 | response = llm.invoke([HumanMessage(content=prompt)]) 269 | return response.content 270 | 271 | # Embed and cluster the texts, resulting in a DataFrame with 'text', 'embd', and 'cluster' columns 272 | df_clusters = embed_cluster_texts(texts) 273 | 274 | # Prepare to expand the DataFrame for easier manipulation of clusters 275 | expanded_list = [] 276 | 277 | # Expand DataFrame entries to document-cluster pairings for straightforward processing 278 | for index, row in df_clusters.iterrows(): 279 | for cluster in row["cluster"]: 280 | expanded_list.append( 281 | {"text": row["text"], "embd": row["embd"], "cluster": cluster} 282 | ) 283 | 284 | # Create a new DataFrame from the expanded list 285 | expanded_df = pd.DataFrame(expanded_list) 286 | 287 | # Retrieve unique cluster identifiers for processing 288 | all_clusters = expanded_df["cluster"].unique().tolist() 289 | 290 | print(f"--Generated {len(all_clusters)} clusters--") 291 | 292 | summaries = summarize_cluster.batch( 293 | [ 294 | expanded_df.loc[expanded_df["cluster"] == cluster_idx, "text"].tolist() 295 | for cluster_idx in all_clusters 296 | ], 297 | config={"max_concurrency": 10}, 298 | ) 299 | 300 | # Create a DataFrame to store summaries with their corresponding cluster and level 301 | df_summary = pd.DataFrame( 302 | { 303 | "summaries": summaries, 304 | "level": [level] * len(summaries), 305 | "cluster": list(all_clusters), 306 | } 307 | ) 308 | 309 | return df_clusters, df_summary 310 | 311 | 312 | def recursive_embed_cluster_summarize( 313 | texts: list[str], level: int = 1, n_levels: int = 3 314 | ) -> dict[int, tuple[pd.DataFrame, pd.DataFrame]]: 315 | """ 316 | Recursively embeds, clusters, and summarizes texts up to a specified level or until 317 | the number of unique clusters becomes 1, storing the results at each level. 318 | 319 | Parameters: 320 | - texts: List[str], texts to be processed. 321 | - level: int, current recursion level (starts at 1). 322 | - n_levels: int, maximum depth of recursion. 323 | 324 | Returns: 325 | - Dict[int, Tuple[pd.DataFrame, pd.DataFrame]], a dictionary where keys are the recursion 326 | levels and values are tuples containing the clusters DataFrame and summaries DataFrame at that level. 327 | """ 328 | results = {} # Dictionary to store results at each level 329 | 330 | # Perform embedding, clustering, and summarization for the current level 331 | df_clusters, df_summary = embed_cluster_summarize_texts(texts, level) 332 | 333 | # Store the results of the current level 334 | results[level] = (df_clusters, df_summary) 335 | 336 | # Determine if further recursion is possible and meaningful 337 | unique_clusters = df_summary["cluster"].nunique() 338 | 339 | if level < n_levels and unique_clusters > 1: 340 | # Use summaries as the input texts for the next level of recursion 341 | new_texts = df_summary["summaries"].tolist() 342 | next_level_results = recursive_embed_cluster_summarize( 343 | new_texts, level + 1, n_levels 344 | ) 345 | 346 | # Merge the results from the next level into the current results dictionary 347 | results.update(next_level_results) 348 | 349 | return results 350 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/reflection.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter 3 | from langchain_community.document_loaders import WebBaseLoader 4 | from langchain_core.vectorstores import InMemoryVectorStore 5 | 6 | from llm_rag import embeddings 7 | 8 | 9 | def load_documents(): 10 | articles = [ 11 | "https://lilianweng.github.io/posts/2023-06-23-agent/", 12 | "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/", 13 | "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/", 14 | ] 15 | loader = WebBaseLoader( 16 | web_paths=articles, 17 | bs_kwargs=dict( 18 | parse_only=bs4.SoupStrainer( 19 | class_=("post-content", "post-title", "post-header") 20 | ) 21 | ), 22 | ) 23 | docs = loader.load() 24 | return docs 25 | 26 | 27 | def prepare_vectorstore(docs, embeddings): 28 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) 29 | splits = text_splitter.split_documents(docs) 30 | vectorstore = InMemoryVectorStore(embeddings) 31 | vectorstore.add_documents(documents=splits) 32 | return vectorstore 33 | 34 | 35 | docs = load_documents() 36 | vectorstore = prepare_vectorstore(docs, embeddings) 37 | retriever = vectorstore.as_retriever() 38 | -------------------------------------------------------------------------------- /src/llm_rag/indexing/self_query.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | from datetime import datetime 4 | 5 | import chromadb 6 | from chromadb.config import Settings 7 | from langchain.chains.query_constructor.base import ( 8 | StructuredQueryOutputParser, 9 | get_query_constructor_prompt, 10 | ) 11 | from langchain.chains.query_constructor.schema import AttributeInfo 12 | from langchain.retrievers.self_query.base import SelfQueryRetriever 13 | from langchain.text_splitter import RecursiveCharacterTextSplitter 14 | from langchain_chroma import Chroma 15 | from langchain_community.document_loaders import YoutubeLoader 16 | from langchain_community.query_constructors.chroma import ChromaTranslator 17 | from langchain_core.messages import HumanMessage 18 | from langchain_core.runnables import chain 19 | from llm_rag import embeddings, llm, project_path 20 | from pytube import Playlist 21 | from pytube.innertube import _default_clients 22 | 23 | VECTORSTORE_PATH = project_path / "data/vectorstore/chroma" 24 | 25 | 26 | async def load_youtube_video_transcript(video_url): 27 | data = await YoutubeLoader.from_youtube_url(video_url, add_video_info=True).aload() 28 | return data[0] 29 | 30 | 31 | async def load_documents(): 32 | # https://github.com/pytube/pytube/issues/1894#issue-2180600881 33 | _default_clients["ANDROID"]["context"]["client"]["clientVersion"] = "19.08.35" 34 | 35 | playlist = Playlist( 36 | "https://www.youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x" 37 | ) 38 | coros = [ 39 | load_youtube_video_transcript(video_url) for video_url in playlist.video_urls 40 | ] 41 | docs = await asyncio.gather(*coros) 42 | return docs 43 | 44 | 45 | def load_data(): 46 | return asyncio.run(load_documents()) 47 | 48 | 49 | def generate_chunk_content(chunk): 50 | return "\n\n".join( 51 | [ 52 | f"Title:\n{chunk.metadata['title']}", 53 | f"Description:\n{chunk.metadata['description']}", 54 | f"Transcript:\n{chunk.page_content}", 55 | ] 56 | ) 57 | 58 | 59 | def prepare_vectorstore(docs, embeddings): 60 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=200) 61 | splits = text_splitter.split_documents(docs) 62 | 63 | datetime_format = "%Y-%m-%d %H:%M:%S" 64 | date_format = "%Y%m%d" 65 | processed_splits = [] 66 | 67 | for split in splits: 68 | processed_split = split.copy() 69 | processed_split.metadata["publish_date"] = int( 70 | datetime.strptime( 71 | processed_split.metadata["publish_date"], datetime_format 72 | ).strftime(date_format) 73 | ) 74 | processed_split.page_content = generate_chunk_content(processed_split) 75 | processed_splits.append(processed_split) 76 | 77 | collection_name = "youtube-rag-from-scratch" 78 | vectorstore_settings = Settings(anonymized_telemetry=False) 79 | client = chromadb.PersistentClient( 80 | path=str(VECTORSTORE_PATH), settings=vectorstore_settings 81 | ) 82 | Chroma(collection_name=collection_name, client=client).delete_collection() 83 | vectorstore = Chroma( 84 | collection_name=collection_name, embedding_function=embeddings, client=client 85 | ) 86 | vectorstore.add_documents(documents=processed_splits) 87 | 88 | return vectorstore 89 | 90 | 91 | def generate_query_constructor_prompt(): 92 | translator = ChromaTranslator() 93 | document_content_description = "Tutorial videos about RAG" 94 | metadata_field_info = [ 95 | AttributeInfo( 96 | name="view_count", 97 | description="Video views count", 98 | type="integer", 99 | ), 100 | AttributeInfo( 101 | name="publish_date", 102 | description="Video publish date in format YYYYMMDD", 103 | type="int", 104 | ), 105 | AttributeInfo( 106 | name="length", 107 | description="Video length (seconds)", 108 | type="float", 109 | ), 110 | ] 111 | examples = [ 112 | ( 113 | "Find videos under 5 minutes", 114 | { 115 | "query": "Videos with length less than 300 seconds", 116 | "filter": 'lt("length", 300.0)', 117 | }, 118 | ), 119 | ( 120 | "Find videos published in 2024", 121 | { 122 | "query": "Videos with date greater or equal than 2024-01-01 and less than 2025-01-01", 123 | "filter": 'and(gte("publish_date", 20240101), lt("publish_date", 20250101))', 124 | }, 125 | ), 126 | ( 127 | "Find videos about indexing", 128 | { 129 | "query": "Videos about indexing", 130 | "filter": "NO_FILTER", 131 | }, 132 | ), 133 | ( 134 | "Find 3 videos about indexing", 135 | { 136 | "query": "3 videos about indexing", 137 | "filter": "NO_FILTER", 138 | "limit": 3, 139 | }, 140 | ), 141 | ] 142 | query_constructor_prompt = get_query_constructor_prompt( 143 | document_content_description, 144 | metadata_field_info, 145 | examples=examples, 146 | allowed_comparators=translator.allowed_comparators, 147 | allowed_operators=translator.allowed_operators, 148 | enable_limit=True, 149 | ) 150 | return query_constructor_prompt 151 | 152 | 153 | def clean_json_string(message): 154 | pattern = r".*?```json\s*(.*?)\s*```" 155 | cleaned_string = re.sub( 156 | pattern, r"\1", message.content, flags=re.DOTALL | re.IGNORECASE 157 | ) 158 | return cleaned_string.strip() 159 | 160 | 161 | @chain 162 | def query_constructor(query): 163 | query_constructor_prompt = generate_query_constructor_prompt() 164 | query_constructor_prompt_messages = query_constructor_prompt.format(query=query) 165 | response = llm.invoke([HumanMessage(content=query_constructor_prompt_messages)]) 166 | clean_response = clean_json_string(response) 167 | 168 | output_parser = StructuredQueryOutputParser.from_components( 169 | allowed_comparators=translator.allowed_comparators, 170 | allowed_operators=translator.allowed_operators, 171 | ) 172 | parsed_response = output_parser.invoke(clean_response) 173 | 174 | return parsed_response 175 | 176 | 177 | def get_collection_size(vectorstore): 178 | try: 179 | collection_size = len(vectorstore.get()["ids"]) 180 | except Exception: 181 | collection_size = 0 182 | 183 | return collection_size 184 | 185 | 186 | docs = load_data() 187 | vectorstore = prepare_vectorstore(docs, embeddings) 188 | translator = ChromaTranslator() 189 | retriever = SelfQueryRetriever( 190 | query_constructor=query_constructor, 191 | vectorstore=vectorstore, 192 | structured_query_translator=translator, 193 | verbose=True, 194 | search_kwargs={"k": get_collection_size(vectorstore)}, 195 | ) 196 | --------------------------------------------------------------------------------