├── .env.example
├── .gitignore
├── .python-version
├── .vscode
└── launch.json
├── LICENSE
├── README.md
├── langgraph.json
├── notebooks
└── rag-from-scratch
│ ├── 01-overview.ipynb
│ ├── 02-indexing.ipynb
│ ├── 03-retrieval.ipynb
│ ├── 04-generation.ipynb
│ ├── 05-multi-query.ipynb
│ ├── 06-rag-fusion.ipynb
│ ├── 07-01-decomposition-recursive.ipynb
│ ├── 07-02-decomposition-parallel.ipynb
│ ├── 08-step-back.ipynb
│ ├── 09-hyde.ipynb
│ ├── 10-01-logical-routing.ipynb
│ ├── 10-02-semantic-routing.ipynb
│ ├── 11-query-construction.ipynb
│ ├── 12-01-multi-vector-summary.ipynb
│ ├── 12-02-multi-vector-chunks.ipynb
│ ├── 12-03-multi-vector-hypothetical-questions.ipynb
│ ├── 13-raptor.ipynb
│ ├── 14-colbert.ipynb
│ ├── 15-crag.ipynb
│ ├── 16-self-rag.ipynb
│ └── images
│ ├── 01-01-overview.png
│ ├── 01-02-overview.png
│ ├── 02-indexing.png
│ ├── 03-01-retrieval.png
│ ├── 03-02-retrieval.png
│ ├── 04-generation.png
│ ├── 05-multi-query.png
│ ├── 06-rag-fusion.png
│ ├── 07-01-decomposition-recursive.png
│ ├── 07-02-decomposition-parallel.png
│ ├── 08-step-back.png
│ ├── 09-hyde.png
│ ├── 10-01-logical-routing.png
│ ├── 10-01-structured-output.png
│ ├── 10-02-semantic-routing.png
│ ├── 11-query-construction.png
│ ├── 11-self-query.jpg
│ ├── 12-01-multi-vector-summary.png
│ ├── 12-02-multi-vector-chunks.png
│ ├── 12-03-multi-vector-hypothetical-questions.png
│ ├── 13-raptor.png
│ ├── 14-01-colbert.png
│ ├── 14-02-colbert.jpg
│ ├── 15-crag-implementation.png
│ ├── 15-crag.png
│ ├── 16-self-rag-implementation.png
│ ├── 16-self-rag.png
│ ├── generation.png
│ ├── indexing-01.png
│ ├── query-construction-01.png
│ ├── query-translation-01.png
│ ├── query-translation-02.png
│ ├── rag.png
│ ├── retrieval.png
│ └── routing-01.png
├── pyproject.toml
├── requirements.txt
├── src
└── llm_rag
│ ├── __init__.py
│ ├── graphs
│ ├── colbert
│ │ ├── __init__.py
│ │ └── colbert_model.py
│ ├── crag
│ │ ├── __init__.py
│ │ └── crag.py
│ ├── decomposition
│ │ ├── __init__.py
│ │ ├── parallel.py
│ │ └── recursive.py
│ ├── hyde
│ │ ├── __init__.py
│ │ └── hyde.py
│ ├── multi_query
│ │ ├── __init__.py
│ │ └── multi_query.py
│ ├── multi_vector
│ │ ├── __init__.py
│ │ ├── chunks.py
│ │ ├── hypothetical_questions.py
│ │ └── summary.py
│ ├── query_construction
│ │ ├── __init__.py
│ │ └── self_query.py
│ ├── rag_fusion
│ │ ├── __init__.py
│ │ └── rag_fusion.py
│ ├── raptor
│ │ ├── __init__.py
│ │ └── raptor.py
│ ├── routing
│ │ ├── __init__.py
│ │ ├── logical.py
│ │ └── semantic.py
│ ├── self_rag
│ │ ├── __init__.py
│ │ └── self_rag.py
│ ├── step_back
│ │ ├── __init__.py
│ │ └── step_back.py
│ └── utils.py
│ └── indexing
│ ├── __init__.py
│ ├── article.py
│ ├── colbert_model.py
│ ├── multi_vector
│ ├── __init__.py
│ ├── chunks.py
│ ├── hypothetical_questions.py
│ └── summary.py
│ ├── raptor
│ ├── __init__.py
│ ├── raptor.py
│ └── utils.py
│ ├── reflection.py
│ └── self_query.py
└── uv.lock
/.env.example:
--------------------------------------------------------------------------------
1 | LANGCHAIN_TRACING_V2=true
2 | LANGCHAIN_API_KEY=
3 |
4 | OPENAI_API_KEY=
5 |
6 | TAVILY_API_KEY=
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # Ruff stuff:
171 | .ruff_cache/
172 |
173 | # PyPI configuration file
174 | .pypirc
175 |
176 |
177 | .env.*
178 | data/
179 | .langgraph_api/
180 | .ragatouille/
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Python Debugger: Current File",
9 | "type": "debugpy",
10 | "request": "launch",
11 | "program": "${file}",
12 | "console": "integratedTerminal",
13 | "justMyCode": false
14 | },
15 | {
16 | "name": "Python Debugger: Remote Attach",
17 | "type": "debugpy",
18 | "request": "attach",
19 | "connect": {
20 | "host": "localhost",
21 | "port": 5678
22 | },
23 | "pathMappings": [
24 | {
25 | "localRoot": "${workspaceFolder}",
26 | "remoteRoot": "."
27 | }
28 | ],
29 | }
30 | ]
31 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025, Dmitry Labazkin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LLM RAG
2 | ## Configuration
3 | ### Environment
4 | - Copy `.env.example` file to `.env`
5 | - Fill in the required values
6 | ## Installation
7 | ### Using `pip`
8 | - Create and activate new Python virtual environment
9 | - `pip install -r requirements.txt`
10 | - `pip install -e .` (basic packages)
11 | or
12 | `pip install -e .[ragatouille]` (with `ragatouille` for ColBERT)
13 | ### Using `uv`
14 | - `uv sync --group dev` (basic packages)
15 | or
16 | `uv sync --group dev --extra ragatouille` (with `ragatouille` for ColBERT)
17 | - `uv pip install -e .`
18 |
19 |
20 | ## RAG From Scratch
21 | Author of original series - [Lance Martin](https://github.com/rlancemartin)
22 | ### Original Sources
23 | #### Video
24 | [YouTube Playlist](https://www.youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x)
25 |
26 | #### Code
27 | [Main Repository](https://github.com/langchain-ai/rag-from-scratch)
28 | [Corrective RAG (CRAG)](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_crag.ipynb)
29 | [Self-RAG](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag.ipynb)
30 |
31 |
32 | ### Table of Contents
33 | | Part | Name | Video | Slides | Jupyter Notebook | Python Script | LangGraph Studio |
34 | | ---- | ---------------------------------------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |
35 | | 1 | Overview | [Watch](https://www.youtube.com/watch?v=wd7TZ4w1mSw&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1C9IaAwHoWcc4RSTqo-pCoN3h0nCgqV2JEYZUJunv_9Q) | [01-overview.ipynb](notebooks/rag-from-scratch/01-overview.ipynb) | - | - |
36 | | 2 | Indexing | [Watch](https://www.youtube.com/watch?v=bjb_EMsTDKI&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1MhsCqZs7wTX6P19TFnA9qRSlxH3u-1-0gWkhBiDG9lQ) | [02-indexing.ipynb](notebooks/rag-from-scratch/02-indexing.ipynb) | - | - |
37 | | 3 | Retrieval | [Watch](https://www.youtube.com/watch?v=LxNVgdIz9sU&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/124I8jlBRCbb0LAUhdmDwbn4nREqxSxZU1RF_eTGXUGc) | [03-retrieval.ipynb](notebooks/rag-from-scratch/03-retrieval.ipynb) | - | - |
38 | | 4 | Generation | [Watch](https://www.youtube.com/watch?v=Vw52xyyFsB8&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1eRJwzbdSv71e9Ou9yeqziZrz1UagwX8B1kL4TbL5_Gc) | [04-generation.ipynb](notebooks/rag-from-scratch/04-generation.ipynb) | - | - |
39 | | 5 | Query Translation - Multi-Query | [Watch](https://www.youtube.com/watch?v=JChPi0CRnDY&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/15pWydIszbQG3Ipur9COfTduutTZm6ULdkkyX-MNry8I) | [05-multi-query.ipynb](notebooks/rag-from-scratch/05-multi-query.ipynb) | [multi_query.py](src/llm_rag/graphs/multi_query/multi_query.py) | Query Translation - Multi-Query |
40 | | 6 | Query Translation - RAG-Fusion | [Watch](https://www.youtube.com/watch?v=77qELPbNgxA&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1EwykmdVSQqlh6XpGt8APOMYp4q1CZqqeclAx61pUcjI) | [06-rag-fusion.ipynb](notebooks/rag-from-scratch/06-rag-fusion.ipynb) | [rag_fusion.py](src/llm_rag/graphs/rag_fusion/rag_fusion.py) | Query Translation - RAG-Fusion |
41 | | 7 | Query Translation - Decomposition | [Watch](https://www.youtube.com/watch?v=h0OPWlEOank&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1O97KYrsmYEmhpQ6nkvOVAqQYMJvIaZulGFGmz4cuuVE) | [07-01-decomposition-recursive.ipynb](notebooks/rag-from-scratch/07-01-decomposition-recursive.ipynb)
[07-02-decomposition-parallel.ipynb](notebooks/rag-from-scratch/07-02-decomposition-parallel.ipynb) | [recursive.py](src/llm_rag/graphs/decomposition/recursive.py)
[parallel.py](src/llm_rag/graphs/decomposition/parallel.py) | Query Translation - Decomposition (Recursive)
Query Translation - Decomposition (Parallel) |
42 | | 8 | Query Translation - Step-Back Prompting | [Watch](https://www.youtube.com/watch?v=xn1jEjRyJ2U&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1L0MRGVDxYA1eLOR0L_6Ze1l2YV8AhN1QKUtmNA-fJlU) | [08-step-back.ipynb](notebooks/rag-from-scratch/08-step-back.ipynb) | [step_back.py](src/llm_rag/graphs/step_back/step_back.py) | Query Translation - Step-Back Prompting |
43 | | 9 | Query Translation - HyDE | [Watch](https://www.youtube.com/watch?v=SaDzIVkYqyY&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/10MmB_QEiS4m00xdyu-92muY-8jC3CdaMpMXbXjzQXsM) | [09-hyde.ipynb](notebooks/rag-from-scratch/09-hyde.ipynb) | [hyde.py](src/llm_rag/graphs/hyde/hyde.py) | Query Translation - HyDE |
44 | | 10 | Routing | [Watch](https://www.youtube.com/watch?v=pfpIndq7Fi8&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1kC6jFj8C_1ZXDYcFaJ8vhJvCYEwxwsVqk2VVeKKuyx4) | [10-01-logical-routing.ipynb](notebooks/rag-from-scratch/10-01-logical-routing.ipynb) | [logical.py](src/llm_rag/graphs/routing/logical.py)
[semantic.py](src/llm_rag/graphs/routing/semantic.py) | Routing - Logical Routing
Routing - Semantic Routing |
45 | | 11 | Query Construction | [Watch](https://www.youtube.com/watch?v=kl6NwWYxvbM&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1bUwz4PgzMIwbBi7DFzpHUkLL4Z6jcKmNGJ-BlK0Hpps) | [11-query-construction.ipynb](notebooks/rag-from-scratch/11-query-construction.ipynb) | [self_query.py](src/llm_rag/graphs/query_construction/self_query.py) | Query Construction - Self-Query |
46 | | 12 | Indexing - Multi-Representation Indexing | [Watch](https://www.youtube.com/watch?v=gTCU9I6QqCE&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1Pu3q1MApA-V_PMvL2YDmWzaDX3HkTh0uUl2BFTcsalk) | [12-01-multi-vector-summary.ipynb](notebooks/rag-from-scratch/12-01-multi-vector-summary.ipynb)
[12-02-multi-vector-chunks.ipynb](notebooks/rag-from-scratch/12-02-multi-vector-chunks.ipynb)
[12-03-multi-vector-hypothetical-questions.ipynb](notebooks/rag-from-scratch/12-03-multi-vector-hypothetical-questions.ipynb) | [summary.py](src/llm_rag/graphs/multi_vector/summary.py)
[chunks.py](src/llm_rag/graphs/multi_vector/chunks.py)
[hypothetical_questions.py](src/llm_rag/graphs/multi_vector/hypothetical_questions.py) | Indexing - Multi-Vector - Summary
Indexing - Multi-Vector - Chunks
Indexing - Multi-Vector - Hypothetical Questions |
47 | | 13 | Indexing - RAPTOR | [Watch](https://www.youtube.com/watch?v=z_6EeA2LDSw&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1U8NpSS1sq3-deiNvSGGNg_UY2Zh_5fS2HabuQFJPftc) | [13-raptor.ipynb](notebooks/rag-from-scratch/13-raptor.ipynb) | [raptor.py](src/llm_rag/graphs/raptor/raptor.py) | - | |
48 | | 14 | Indexing - ColBERT | [Watch](https://www.youtube.com/watch?v=cN6S0Ehm7_8&list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x) | [View](https://docs.google.com/presentation/d/1IRhAdGjIevrrotdplHNcc4aXgIYyKamUKTWtB3m3aMU) | [14-colbert.ipynb](notebooks/rag-from-scratch/14-colbert.ipynb) | [colbert_model.py](src/llm_rag/graphs/colbert/colbert_model.py) | - | |
49 | | 15 | Retrieval - CRAG | [Watch](https://www.youtube.com/watch?v=pbAd8O1Lvm4) | - | [15-crag.ipynb](notebooks/rag-from-scratch/15-crag.ipynb) | [crag.py](src/llm_rag/graphs/crag/crag.py) | Retrieval - CRAG | |
50 | | 16 | Generation - Self-RAG | [Watch](https://www.youtube.com/watch?v=pbAd8O1Lvm4) | - | [16-self-rag.ipynb](notebooks/rag-from-scratch/16-self-rag.ipynb) | [self_rag.py](src/llm_rag/graphs/self_rag/self_rag.py) | Generation - Self-RAG
51 |
52 |
53 | ### Query Translation
54 | #### RAG Fusion
55 | [Forget RAG, the Future is RAG-Fusion](https://medium.com/towards-data-science/forget-rag-the-future-is-rag-fusion-1147298d8ad1)
56 | [RAG-Fusion: The Next Frontier of Search Technology](https://github.com/Raudaschl/rag-fusion)
57 | [Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf)
58 | [Implementing Reciprocal Rank Fusion (RRF) in Python](https://safjan.com/implementing-rank-fusion-in-python)
59 |
60 | #### Decomposition (Recursive)
61 | [Least-to-Most Prompting Enables Complex Reasoning in Large Language Models](https://arxiv.org/abs/2205.10625)
62 | [Interleaving Retrieval with Chain-of-Thought Reasoning for Knowledge-Intensive Multi-Step Questions](https://arxiv.org/abs/2212.10509)
63 |
64 | #### Step-Back Prompting
65 | [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](https://arxiv.org/abs/2310.06117)
66 |
67 | #### HyDE
68 | [Precise Zero-Shot Dense Retrieval without Relevance Labels](https://arxiv.org/abs/2212.10496)
69 |
70 |
71 | ### Routing
72 | [Semantic Router](https://github.com/aurelio-labs/semantic-router)
73 |
74 |
75 | ### Query Construction
76 | [Langchain Self Query With Dates](https://notes.alexkehayias.com/langchain-self-query-with-dates/)
77 |
78 |
79 | ### Indexing
80 | #### Multi-Representation Indexing
81 | [Dense X Retrieval: What Retrieval Granularity Should We Use?](https://arxiv.org/abs/2312.06648)
82 |
83 | #### RAPTOR
84 | [RAPTOR: Recursive Abstractive Processing for Tre-Organized Retrieval](https://arxiv.org/pdf/2401.18059)
85 | [Building long context RAG with RAPTOR from scratch](https://www.youtube.com/watch?v=jbGchdTL7d0)
86 |
87 | #### ColBERT
88 | [ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT](https://arxiv.org/abs/2004.12832)
89 | [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](https://arxiv.org/abs/2112.01488)
90 | [RAGatouille](https://github.com/AnswerDotAI/RAGatouille)
91 | [[Paper review] ColBERT, ColBERTv2](https://pangyoalto.com/en/colbertv1-2-review-en)
92 | [Overcoming the Limits of RAG with ColBERT](https://thenewstack.io/overcoming-the-limits-of-rag-with-colbert)
93 | [ColBERT Inference in the Browser](https://colbert.aiserv.cloud)
94 |
95 |
96 | ### Agentic RAG
97 | [Self-Reflective RAG with LangGraph](https://blog.langchain.dev/agentic-rag-with-langgraph)
98 |
99 | #### CRAG
100 | [Corrective Retrieval Augmented Generation](https://arxiv.org/abs/2401.15884)
101 |
102 | #### Self-RAG
103 | [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](https://arxiv.org/abs/2310.11511)
--------------------------------------------------------------------------------
/langgraph.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": [
3 | "."
4 | ],
5 | "graphs": {
6 | "Query Translation - Multi-Query": "src/llm_rag/graphs/multi_query/multi_query.py:graph",
7 | "Query Translation - RAG-Fusion": "src/llm_rag/graphs/rag_fusion/rag_fusion.py:graph",
8 | "Query Translation - Decomposition (Recursive)": "src/llm_rag/graphs/decomposition/recursive.py:graph",
9 | "Query Translation - Decomposition (Parallel)": "src/llm_rag/graphs/decomposition/parallel.py:graph",
10 | "Query Translation - Step-Back Prompting": "src/llm_rag/graphs/step_back/step_back.py:graph",
11 | "Query Translation - HyDE": "src/llm_rag/graphs/hyde/hyde.py:graph",
12 | "Routing - Logical Routing": "src/llm_rag/graphs/routing/logical.py:graph",
13 | "Routing - Semantic Routing": "src/llm_rag/graphs/routing/semantic.py:graph",
14 | "Query Construction - Self-Query": "src/llm_rag/graphs/query_construction/self_query.py:graph",
15 | "Indexing - Multi-Vector - Summary": "src/llm_rag/graphs/multi_vector/summary.py:graph",
16 | "Indexing - Multi-Vector - Chunks": "src/llm_rag/graphs/multi_vector/chunks.py:graph",
17 | "Indexing - Multi-Vector - Hypothetical Questions": "src/llm_rag/graphs/multi_vector/hypothetical_questions.py:graph",
18 | "Retrieval - CRAG": "src/llm_rag/graphs/crag/crag.py:graph",
19 | "Generation - Self-RAG": "src/llm_rag/graphs/self_rag/self_rag.py:graph"
20 | },
21 | "env": "./.env",
22 | "dockerfile_lines": []
23 | }
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/01-overview.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 10,
6 | "id": "07416ba6-134d-4b1e-905b-aacd355cb91e",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import os\n",
11 | "\n",
12 | "from dotenv import find_dotenv, load_dotenv"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 11,
18 | "id": "5f57d2d4-b264-4de9-bc06-c14307418060",
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/plain": [
24 | "True"
25 | ]
26 | },
27 | "execution_count": 11,
28 | "metadata": {},
29 | "output_type": "execute_result"
30 | }
31 | ],
32 | "source": [
33 | "load_dotenv(find_dotenv('.env'))"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 12,
39 | "id": "66b88d20-563a-4ccb-bb28-6b6144d048b7",
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"RAG From Scratch: Part 1 (Overview)\""
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "id": "9c35780a-28cd-4e73-a39c-b533dca92276",
49 | "metadata": {},
50 | "source": [
51 | ""
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "id": "66049591-4763-41c0-9f86-c1da026294a0",
57 | "metadata": {},
58 | "source": [
59 | "# Part 1: Overview"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "id": "43f0b3d9-cb4c-4962-9a9a-d8775a72468c",
65 | "metadata": {},
66 | "source": [
67 | ""
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "id": "d592a47f-42df-4e4a-86da-65f245129e1c",
73 | "metadata": {},
74 | "source": [
75 | ""
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "id": "b0e3e69b-4bbc-41b6-8ffe-dbc4c5221ca3",
81 | "metadata": {},
82 | "source": [
83 | "## Configure components"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 13,
89 | "id": "590c8ee8-4228-4054-b312-f89e5ff6d635",
90 | "metadata": {},
91 | "outputs": [],
92 | "source": [
93 | "from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 14,
99 | "id": "bfb1e9b8-70e6-4490-bb8a-0d0a066c4683",
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/plain": [
105 | "AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 8, 'total_tokens': 18, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_b376dfbbd5', 'id': 'chatcmpl-BKMvWXqejr2sHYneYvd4Sr6SbRKv0', 'finish_reason': 'stop', 'logprobs': None}, id='run-f1bd73da-f189-46b5-8fab-aa082cf98064-0', usage_metadata={'input_tokens': 8, 'output_tokens': 10, 'total_tokens': 18, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
106 | ]
107 | },
108 | "execution_count": 14,
109 | "metadata": {},
110 | "output_type": "execute_result"
111 | }
112 | ],
113 | "source": [
114 | "llm = ChatOpenAI(\n",
115 | " model=\"gpt-4o-mini\",\n",
116 | " temperature=0\n",
117 | ")\n",
118 | "llm.invoke(\"Hello\")"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 15,
124 | "id": "c2bdeebb-3875-4fdd-97be-346c92eeb240",
125 | "metadata": {},
126 | "outputs": [
127 | {
128 | "data": {
129 | "text/plain": [
130 | "1536"
131 | ]
132 | },
133 | "execution_count": 15,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
140 | "len(embeddings.embed_query(\"Hello\"))"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "id": "9ca63ab0-004c-4f93-b0e6-a06c7e84c3ed",
146 | "metadata": {},
147 | "source": [
148 | "## Load documents"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 16,
154 | "id": "5b127b80-4839-4edd-9b6e-1a55a90a3fba",
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "import bs4\n",
159 | "from langchain_community.document_loaders import WebBaseLoader"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 17,
165 | "id": "e29df1f8-327a-437a-85f9-d87867cbfd28",
166 | "metadata": {},
167 | "outputs": [
168 | {
169 | "data": {
170 | "text/plain": [
171 | "1"
172 | ]
173 | },
174 | "execution_count": 17,
175 | "metadata": {},
176 | "output_type": "execute_result"
177 | }
178 | ],
179 | "source": [
180 | "loader = WebBaseLoader(\n",
181 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
182 | " bs_kwargs=dict(\n",
183 | " parse_only=bs4.SoupStrainer(\n",
184 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n",
185 | " )\n",
186 | " ),\n",
187 | ")\n",
188 | "docs = loader.load()\n",
189 | "len(docs)"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 18,
195 | "id": "2d06a9e8-aeab-4b37-8ac4-20279c0802af",
196 | "metadata": {},
197 | "outputs": [
198 | {
199 | "name": "stdout",
200 | "output_type": "stream",
201 | "text": [
202 | "\n",
203 | "\n",
204 | " LLM Powered Autonomous Agents\n",
205 | " \n",
206 | "Date: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\n",
207 | "\n",
208 | "\n",
209 | "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n",
210 | "Agent System Overview#\n",
211 | "In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n",
212 | "\n",
213 | "Planning\n",
214 | "\n",
215 | "Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\n",
216 | "Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n",
217 | "\n",
218 | "\n",
219 | "Memory\n",
220 | "\n",
221 | "Short-term memory: I \n"
222 | ]
223 | }
224 | ],
225 | "source": [
226 | "print(docs[0].page_content[:1000])"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "id": "1aff715e-f153-486a-9e4a-85ae300b03e7",
232 | "metadata": {},
233 | "source": [
234 | "## Split documents"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 19,
240 | "id": "8621acc3-5ab8-4f70-a2cb-171795bcf9cb",
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "from langchain.text_splitter import RecursiveCharacterTextSplitter"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": 20,
250 | "id": "3143439a-0271-4ad1-8f9b-78cd2a631098",
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "data": {
255 | "text/plain": [
256 | "66"
257 | ]
258 | },
259 | "execution_count": 20,
260 | "metadata": {},
261 | "output_type": "execute_result"
262 | }
263 | ],
264 | "source": [
265 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
266 | "splits = text_splitter.split_documents(docs)\n",
267 | "len(splits)"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "id": "05600430-cf93-4429-92ea-ca183ec2310c",
273 | "metadata": {},
274 | "source": [
275 | "## Store documents"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 21,
281 | "id": "e13dd7d7-06d6-47ef-8251-ea0ec7dec665",
282 | "metadata": {},
283 | "outputs": [],
284 | "source": [
285 | "from langchain_core.vectorstores import InMemoryVectorStore"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 22,
291 | "id": "c49eece0-d92c-47ac-8db5-bd36e6eb185e",
292 | "metadata": {},
293 | "outputs": [
294 | {
295 | "data": {
296 | "text/plain": [
297 | "(66, 66)"
298 | ]
299 | },
300 | "execution_count": 22,
301 | "metadata": {},
302 | "output_type": "execute_result"
303 | }
304 | ],
305 | "source": [
306 | "vectorstore = InMemoryVectorStore(embeddings)\n",
307 | "doc_ids = vectorstore.add_documents(documents=splits)\n",
308 | "len(doc_ids), len(vectorstore.store)"
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "id": "06c4043d-c52d-4148-b583-d67d628dbff5",
314 | "metadata": {},
315 | "source": [
316 | "## RAG"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": 23,
322 | "id": "561288ac-3b88-4ef0-a4df-9e3ce2d5f029",
323 | "metadata": {},
324 | "outputs": [],
325 | "source": [
326 | "from typing import TypedDict\n",
327 | "\n",
328 | "from langchain_core.documents import Document\n",
329 | "from langchain_core.messages import HumanMessage\n",
330 | "from langgraph.graph import END, START, StateGraph"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 24,
336 | "id": "f7f2b84b-6454-4419-81dd-22c1df67e4ac",
337 | "metadata": {},
338 | "outputs": [
339 | {
340 | "name": "stdout",
341 | "output_type": "stream",
342 | "text": [
343 | "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n",
344 | "Question: {question} \n",
345 | "Context: {context} \n",
346 | "Answer:\n"
347 | ]
348 | }
349 | ],
350 | "source": [
351 | "rag_prompt_template = \"\"\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n",
352 | "Question: {question} \n",
353 | "Context: {context} \n",
354 | "Answer:\"\"\"\n",
355 | "print(rag_prompt_template)"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 25,
361 | "id": "c7efcd9e-e6ef-4de0-9c48-0fab0bfb860a",
362 | "metadata": {},
363 | "outputs": [],
364 | "source": [
365 | "def format_docs(docs):\n",
366 | " return \"\\n\\n\".join(doc.page_content for doc in docs)"
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": 26,
372 | "id": "52e36a6b-474e-4d75-9a72-31f6c05e2f7b",
373 | "metadata": {},
374 | "outputs": [],
375 | "source": [
376 | "class State(TypedDict):\n",
377 | " question: str\n",
378 | " context: list[Document]\n",
379 | " answer: str"
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "execution_count": 27,
385 | "id": "708e72c8-6cc2-4d87-ac99-a89d6dba7e2e",
386 | "metadata": {},
387 | "outputs": [
388 | {
389 | "data": {
390 | "image/png": "",
391 | "text/plain": [
392 | ""
393 | ]
394 | },
395 | "execution_count": 27,
396 | "metadata": {},
397 | "output_type": "execute_result"
398 | }
399 | ],
400 | "source": [
401 | "def retrieve(state: State):\n",
402 | " retrieved_docs = vectorstore.similarity_search(state[\"question\"])\n",
403 | " return {\"context\": retrieved_docs}\n",
404 | "\n",
405 | "def generate(state: State):\n",
406 | " docs_content = format_docs(state[\"context\"])\n",
407 | " rag_prompt = rag_prompt_template.format(\n",
408 | " question=state[\"question\"],\n",
409 | " context=docs_content\n",
410 | " )\n",
411 | " response = llm.invoke([\n",
412 | " HumanMessage(content=rag_prompt)\n",
413 | " ])\n",
414 | " return {\"answer\": response.content}\n",
415 | "\n",
416 | "\n",
417 | "graph_builder = StateGraph(State).add_sequence([retrieve, generate])\n",
418 | "graph_builder.add_edge(START, \"retrieve\")\n",
419 | "graph_builder.add_edge(\"generate\", END)\n",
420 | "graph = graph_builder.compile()\n",
421 | "graph"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 28,
427 | "id": "5faffc5a-bf35-4891-a23c-4696c12ce831",
428 | "metadata": {},
429 | "outputs": [
430 | {
431 | "name": "stdout",
432 | "output_type": "stream",
433 | "text": [
434 | "Task decomposition is the process of breaking down a complex task into smaller, manageable steps or subgoals. This can be achieved through various methods, including prompting a language model, using task-specific instructions, or incorporating human inputs. It enhances the model's performance by allowing it to tackle each component systematically.\n"
435 | ]
436 | }
437 | ],
438 | "source": [
439 | "response = graph.invoke({\"question\": \"What is Task Decomposition?\"})\n",
440 | "print(response[\"answer\"])"
441 | ]
442 | }
443 | ],
444 | "metadata": {
445 | "kernelspec": {
446 | "display_name": "Python 3 (ipykernel)",
447 | "language": "python",
448 | "name": "python3"
449 | },
450 | "language_info": {
451 | "codemirror_mode": {
452 | "name": "ipython",
453 | "version": 3
454 | },
455 | "file_extension": ".py",
456 | "mimetype": "text/x-python",
457 | "name": "python",
458 | "nbconvert_exporter": "python",
459 | "pygments_lexer": "ipython3",
460 | "version": "3.11.11"
461 | }
462 | },
463 | "nbformat": 4,
464 | "nbformat_minor": 5
465 | }
466 |
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/02-indexing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "66828cb1-f3c2-4d37-a20e-ed22501f557b",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import os\n",
11 | "from pathlib import Path\n",
12 | "import re\n",
13 | "\n",
14 | "from dotenv import find_dotenv, load_dotenv"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "bdf02132-e1af-4492-8fbd-93f3b49d9b4f",
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "data": {
25 | "text/plain": [
26 | "True"
27 | ]
28 | },
29 | "execution_count": 2,
30 | "metadata": {},
31 | "output_type": "execute_result"
32 | }
33 | ],
34 | "source": [
35 | "load_dotenv(find_dotenv('.env'))"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "id": "bc4ea5ac-266b-4cd3-a01b-ccdef04cc2cc",
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"RAG From Scratch: Part 2 (Indexing)\""
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 4,
51 | "id": "90c5b359-5120-4613-a0ab-0c2402462dfe",
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "DATA_PATH = Path('./data')\n",
56 | "VECTORSTORE_PATH = DATA_PATH / 'vectorstore'"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "id": "17ee3446-d8cc-4fb6-851e-c27b9806aded",
62 | "metadata": {},
63 | "source": [
64 | "# Part 2: Indexing"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "id": "e21c8db7-7b68-4c57-aac3-df18f4cf5e41",
70 | "metadata": {},
71 | "source": [
72 | ""
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "id": "c321cec8-1e53-4e81-9751-72aecd8b6c57",
78 | "metadata": {},
79 | "source": [
80 | "## Configure components"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 5,
86 | "id": "4eb815c5-6de0-40e6-a698-6a5e31b3102c",
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "from langchain_openai import OpenAIEmbeddings"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 6,
96 | "id": "a9c9f25d-589f-4aab-852d-370a6a90f658",
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | "1536"
103 | ]
104 | },
105 | "execution_count": 6,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "embeddings_model_name = \"text-embedding-3-small\"\n",
112 | "embeddings = OpenAIEmbeddings(model=embeddings_model_name)\n",
113 | "len(embeddings.embed_query(\"Hello\"))"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "id": "67ae5556-a40c-4059-8144-1cf9164fb857",
119 | "metadata": {},
120 | "source": [
121 | "## Load documents"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 7,
127 | "id": "c3b5f058-7604-4101-9fba-0c10e225ae3c",
128 | "metadata": {},
129 | "outputs": [
130 | {
131 | "name": "stderr",
132 | "output_type": "stream",
133 | "text": [
134 | "USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
135 | ]
136 | }
137 | ],
138 | "source": [
139 | "import bs4\n",
140 | "from langchain_community.document_loaders import WebBaseLoader"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 8,
146 | "id": "1c432b82-b767-4fd8-b75b-ef108726a41d",
147 | "metadata": {},
148 | "outputs": [
149 | {
150 | "data": {
151 | "text/plain": [
152 | "1"
153 | ]
154 | },
155 | "execution_count": 8,
156 | "metadata": {},
157 | "output_type": "execute_result"
158 | }
159 | ],
160 | "source": [
161 | "loader = WebBaseLoader(\n",
162 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
163 | " bs_kwargs=dict(\n",
164 | " parse_only=bs4.SoupStrainer(\n",
165 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n",
166 | " )\n",
167 | " ),\n",
168 | ")\n",
169 | "docs = loader.load()\n",
170 | "len(docs)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 9,
176 | "id": "f51ac995-833e-4670-aa5d-84d75b6c479e",
177 | "metadata": {},
178 | "outputs": [
179 | {
180 | "name": "stdout",
181 | "output_type": "stream",
182 | "text": [
183 | "\n",
184 | "\n",
185 | " LLM Powered Autonomous Agents\n",
186 | " \n",
187 | "Date: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\n",
188 | "\n",
189 | "\n",
190 | "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n",
191 | "Agent System Overview#\n",
192 | "In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n",
193 | "\n",
194 | "Planning\n",
195 | "\n",
196 | "Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\n",
197 | "Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n",
198 | "\n",
199 | "\n",
200 | "Memory\n",
201 | "\n",
202 | "Short-term memory: I \n"
203 | ]
204 | }
205 | ],
206 | "source": [
207 | "print(docs[0].page_content[:1000])"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "id": "b02e6b41-544d-4c0e-be1d-0737617b8316",
213 | "metadata": {},
214 | "source": [
215 | "## Split documents"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 10,
221 | "id": "027b5bbd-04ac-4922-888a-f2cbbcc33ccc",
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "from langchain.text_splitter import RecursiveCharacterTextSplitter"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 11,
231 | "id": "81aa6281-08a7-401a-9277-2fab14cd4946",
232 | "metadata": {},
233 | "outputs": [
234 | {
235 | "data": {
236 | "text/plain": [
237 | "66"
238 | ]
239 | },
240 | "execution_count": 11,
241 | "metadata": {},
242 | "output_type": "execute_result"
243 | }
244 | ],
245 | "source": [
246 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
247 | "splits = text_splitter.split_documents(docs)\n",
248 | "len(splits)"
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "id": "7857e737-7d88-4804-a8c4-96cd162f0821",
254 | "metadata": {},
255 | "source": [
256 | "## Store documents"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 12,
262 | "id": "d510ea37-14e6-4f18-99ca-d092197cd939",
263 | "metadata": {},
264 | "outputs": [],
265 | "source": [
266 | "import chromadb\n",
267 | "from chromadb.config import Settings\n",
268 | "from langchain_chroma import Chroma\n",
269 | "from langchain_core.documents import Document"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 13,
275 | "id": "8a0218b5-2b6a-40fc-986e-023808e8ceab",
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "def get_collection_size(vectorstore):\n",
280 | " try:\n",
281 | " collection_size = len(vectorstore.get()[\"ids\"])\n",
282 | " except Exception as _:\n",
283 | " collection_size = 0\n",
284 | "\n",
285 | " return collection_size"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 14,
291 | "id": "81904bc1-cf89-4ebf-a6da-eb8112121896",
292 | "metadata": {},
293 | "outputs": [
294 | {
295 | "data": {
296 | "text/plain": [
297 | "0"
298 | ]
299 | },
300 | "execution_count": 14,
301 | "metadata": {},
302 | "output_type": "execute_result"
303 | }
304 | ],
305 | "source": [
306 | "collection_name=\"embeddings\"\n",
307 | "\n",
308 | "vectorstore_settings = Settings(anonymized_telemetry=False)\n",
309 | "client = chromadb.PersistentClient(\n",
310 | " path=str(VECTORSTORE_PATH), settings=vectorstore_settings\n",
311 | ")\n",
312 | "\n",
313 | "Chroma(collection_name=collection_name, client=client).delete_collection()\n",
314 | "\n",
315 | "vectorstore = Chroma(\n",
316 | " collection_name=collection_name, embedding_function=embeddings, client=client\n",
317 | ")\n",
318 | "\n",
319 | "get_collection_size(vectorstore)"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 15,
325 | "id": "93ef079a-b68c-45e5-b727-5b855e0ad3a1",
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "data": {
330 | "text/plain": [
331 | "66"
332 | ]
333 | },
334 | "execution_count": 15,
335 | "metadata": {},
336 | "output_type": "execute_result"
337 | }
338 | ],
339 | "source": [
340 | "vectorstore.add_documents(splits)\n",
341 | "get_collection_size(vectorstore)"
342 | ]
343 | },
344 | {
345 | "attachments": {},
346 | "cell_type": "markdown",
347 | "id": "9bcb5d79-c382-4166-96dd-e28cc434053e",
348 | "metadata": {},
349 | "source": [
350 | "**Tokenization**\n",
351 | "- [OpenAI tokenizer](https://platform.openai.com/tokenizer)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 16,
357 | "id": "252fe61e-f27d-45fb-b6dd-bf599036a926",
358 | "metadata": {},
359 | "outputs": [],
360 | "source": [
361 | "import tiktoken"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 17,
367 | "id": "357d2986-13d3-40ba-b6c9-921d85880582",
368 | "metadata": {},
369 | "outputs": [],
370 | "source": [
371 | "def num_tokens_from_string(string: str, encoding_name: str) -> int:\n",
372 | " \"\"\"Returns the number of tokens in a text string.\"\"\"\n",
373 | " encoding = tiktoken.get_encoding(encoding_name)\n",
374 | " num_tokens = len(encoding.encode(string))\n",
375 | " return num_tokens"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 18,
381 | "id": "e136e704-ecc5-4373-a86a-1af5adc16091",
382 | "metadata": {},
383 | "outputs": [],
384 | "source": [
385 | "query = \"What kinds of pets do I like?\"\n",
386 | "document = \"My favorite pet is a cat.\""
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 19,
392 | "id": "ae17be41-bbe9-4be9-818f-21a3d504f231",
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "data": {
397 | "text/plain": [
398 | "'cl100k_base'"
399 | ]
400 | },
401 | "execution_count": 19,
402 | "metadata": {},
403 | "output_type": "execute_result"
404 | }
405 | ],
406 | "source": [
407 | "openai_encoding_name = tiktoken.encoding_for_model(embeddings_model_name).name\n",
408 | "openai_encoding_name"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 20,
414 | "id": "bd11a8bc-fea6-43a1-80e5-c3d3304df479",
415 | "metadata": {},
416 | "outputs": [
417 | {
418 | "data": {
419 | "text/plain": [
420 | "8"
421 | ]
422 | },
423 | "execution_count": 20,
424 | "metadata": {},
425 | "output_type": "execute_result"
426 | }
427 | ],
428 | "source": [
429 | "num_tokens_from_string(query, openai_encoding_name)"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 21,
435 | "id": "88df3809-be58-444e-ac4f-88325dec10bd",
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "data": {
440 | "text/plain": [
441 | "7"
442 | ]
443 | },
444 | "execution_count": 21,
445 | "metadata": {},
446 | "output_type": "execute_result"
447 | }
448 | ],
449 | "source": [
450 | "num_tokens_from_string(document, openai_encoding_name)"
451 | ]
452 | },
453 | {
454 | "cell_type": "markdown",
455 | "id": "7390af5d-98b7-444e-b22c-ee0179c622af",
456 | "metadata": {},
457 | "source": [
458 | "**Embeddings**"
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": 22,
464 | "id": "e39b7f41-bb30-4120-b11c-14549b283a9b",
465 | "metadata": {},
466 | "outputs": [
467 | {
468 | "data": {
469 | "text/plain": [
470 | "(1536, 1536)"
471 | ]
472 | },
473 | "execution_count": 22,
474 | "metadata": {},
475 | "output_type": "execute_result"
476 | }
477 | ],
478 | "source": [
479 | "query_embeddings = embeddings.embed_query(query)\n",
480 | "document_embeddings = embeddings.embed_documents([document])[0]\n",
481 | "\n",
482 | "len(query_embeddings), len(document_embeddings)"
483 | ]
484 | },
485 | {
486 | "cell_type": "markdown",
487 | "id": "5ac15fad-1c5b-4039-a70a-e66b1dbc7e4d",
488 | "metadata": {},
489 | "source": [
490 | "**Cosine similarity**"
491 | ]
492 | },
493 | {
494 | "cell_type": "code",
495 | "execution_count": 23,
496 | "id": "86ddb065-77d3-43dd-a0a5-973dd940fced",
497 | "metadata": {},
498 | "outputs": [],
499 | "source": [
500 | "import numpy as np"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 24,
506 | "id": "15569e34-6421-47b6-8b01-797e9c106a5c",
507 | "metadata": {},
508 | "outputs": [],
509 | "source": [
510 | "def cosine_similarity(vec1, vec2):\n",
511 | " dot_product = np.dot(vec1, vec2)\n",
512 | " norm_vec1 = np.linalg.norm(vec1)\n",
513 | " norm_vec2 = np.linalg.norm(vec2)\n",
514 | " return dot_product / (norm_vec1 * norm_vec2)"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": 25,
520 | "id": "90a29dcc-6f34-479f-b6a3-cd8b8f6eb340",
521 | "metadata": {},
522 | "outputs": [
523 | {
524 | "name": "stdout",
525 | "output_type": "stream",
526 | "text": [
527 | "Cosine Similarity: 0.546556128332727\n"
528 | ]
529 | }
530 | ],
531 | "source": [
532 | "similarity = cosine_similarity(query_embeddings, document_embeddings)\n",
533 | "print(\"Cosine Similarity:\", similarity)"
534 | ]
535 | },
536 | {
537 | "cell_type": "code",
538 | "execution_count": 26,
539 | "id": "2b48c94b-b16c-484b-9e61-19d6ae8aafc3",
540 | "metadata": {},
541 | "outputs": [
542 | {
543 | "name": "stdout",
544 | "output_type": "stream",
545 | "text": [
546 | "Cosine Similarity: 0.09272330847288396\n"
547 | ]
548 | }
549 | ],
550 | "source": [
551 | "non_relevant_document = \"The weather is fine.\"\n",
552 | "non_relevant_document_embeddings = embeddings.embed_documents([non_relevant_document])[0]\n",
553 | "\n",
554 | "similarity = cosine_similarity(query_embeddings, non_relevant_document_embeddings)\n",
555 | "print(\"Cosine Similarity:\", similarity)"
556 | ]
557 | },
558 | {
559 | "cell_type": "code",
560 | "execution_count": 27,
561 | "id": "7e5b821b-69d2-46e9-a0f0-69206421fe8a",
562 | "metadata": {},
563 | "outputs": [
564 | {
565 | "name": "stdout",
566 | "output_type": "stream",
567 | "text": [
568 | "Cosine Similarity: 1.0\n"
569 | ]
570 | }
571 | ],
572 | "source": [
573 | "similarity = cosine_similarity(query_embeddings, query_embeddings)\n",
574 | "print(\"Cosine Similarity:\", similarity)"
575 | ]
576 | }
577 | ],
578 | "metadata": {
579 | "kernelspec": {
580 | "display_name": "Python 3 (ipykernel)",
581 | "language": "python",
582 | "name": "python3"
583 | },
584 | "language_info": {
585 | "codemirror_mode": {
586 | "name": "ipython",
587 | "version": 3
588 | },
589 | "file_extension": ".py",
590 | "mimetype": "text/x-python",
591 | "name": "python",
592 | "nbconvert_exporter": "python",
593 | "pygments_lexer": "ipython3",
594 | "version": "3.11.11"
595 | }
596 | },
597 | "nbformat": 4,
598 | "nbformat_minor": 5
599 | }
600 |
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/01-01-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/01-01-overview.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/01-02-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/01-02-overview.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/02-indexing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/02-indexing.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/03-01-retrieval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/03-01-retrieval.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/03-02-retrieval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/03-02-retrieval.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/04-generation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/04-generation.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/05-multi-query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/05-multi-query.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/06-rag-fusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/06-rag-fusion.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/07-01-decomposition-recursive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/07-01-decomposition-recursive.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/07-02-decomposition-parallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/07-02-decomposition-parallel.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/08-step-back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/08-step-back.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/09-hyde.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/09-hyde.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/10-01-logical-routing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/10-01-logical-routing.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/10-01-structured-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/10-01-structured-output.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/10-02-semantic-routing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/10-02-semantic-routing.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/11-query-construction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/11-query-construction.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/11-self-query.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/11-self-query.jpg
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/12-01-multi-vector-summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/12-01-multi-vector-summary.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/12-02-multi-vector-chunks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/12-02-multi-vector-chunks.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/12-03-multi-vector-hypothetical-questions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/12-03-multi-vector-hypothetical-questions.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/13-raptor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/13-raptor.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/14-01-colbert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/14-01-colbert.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/14-02-colbert.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/14-02-colbert.jpg
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/15-crag-implementation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/15-crag-implementation.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/15-crag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/15-crag.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/16-self-rag-implementation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/16-self-rag-implementation.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/16-self-rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/16-self-rag.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/generation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/generation.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/indexing-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/indexing-01.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/query-construction-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/query-construction-01.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/query-translation-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/query-translation-01.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/query-translation-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/query-translation-02.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/rag.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/retrieval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/retrieval.png
--------------------------------------------------------------------------------
/notebooks/rag-from-scratch/images/routing-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/notebooks/rag-from-scratch/images/routing-01.png
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "llm-rag"
3 | version = "0.1.0"
4 | description = "LLM RAG"
5 | readme = "README.md"
6 | requires-python = ">=3.11"
7 | dependencies = [
8 | "langchain-chroma>=0.2.2",
9 | "langchain-community>=0.3.18",
10 | "langchain-openai>=0.3.7",
11 | "langchain-weaviate>=0.0.4",
12 | "langgraph==0.3.31",
13 | "langgraph-cli[inmem]>=0.1.74",
14 | "lark>=1.2.2",
15 | "pandas>=2.2.3",
16 | "python-dotenv>=1.0.1",
17 | "pytube==11.0.2",
18 | "rich>=13.9.4",
19 | "scikit-learn>=1.6.1",
20 | "transformers==4.49.0",
21 | "umap-learn>=0.5.7",
22 | "youtube-transcript-api==1.0.3",
23 | ]
24 |
25 | [project.optional-dependencies]
26 | ragatouille = ["ragatouille>=0.0.9"]
27 |
28 | [dependency-groups]
29 | dev = [
30 | "anywidget>=0.9.18",
31 | "ipywidgets>=8.1.5",
32 | "jupyterlab>=4.3.5",
33 | "jupyterlab-git>=0.51.0",
34 | "plotly[express]>=6.0.1",
35 | "ruff>=0.9.7",
36 | ]
37 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # This file was autogenerated by uv via the following command:
2 | # uv export --no-hashes --no-annotate
3 | aiohappyeyeballs==2.6.1
4 | aiohttp==3.11.16
5 | aiosignal==1.3.2
6 | annotated-types==0.7.0
7 | anyio==4.9.0
8 | anywidget==0.9.18
9 | appnope==0.1.4 ; sys_platform == 'darwin'
10 | argon2-cffi==23.1.0
11 | argon2-cffi-bindings==21.2.0
12 | arrow==1.3.0
13 | asgiref==3.8.1
14 | asttokens==3.0.0
15 | async-lru==2.0.5
16 | attrs==25.3.0
17 | authlib==1.3.1
18 | babel==2.17.0
19 | backoff==2.2.1
20 | bcrypt==4.3.0
21 | beautifulsoup4==4.13.4
22 | bleach==6.2.0
23 | blockbuster==1.5.24 ; python_full_version < '4.0'
24 | build==1.2.2.post1
25 | cachetools==5.5.2
26 | certifi==2025.1.31
27 | cffi==1.17.1
28 | charset-normalizer==3.4.1
29 | chroma-hnswlib==0.7.6
30 | chromadb==0.6.3
31 | click==8.1.8
32 | cloudpickle==3.1.1 ; python_full_version < '4.0'
33 | colorama==0.4.6
34 | coloredlogs==15.0.1
35 | comm==0.2.2
36 | cryptography==44.0.2
37 | dataclasses-json==0.6.7
38 | debugpy==1.8.14
39 | decorator==5.2.1
40 | defusedxml==0.7.1
41 | deprecated==1.2.18
42 | distro==1.9.0
43 | durationpy==0.9
44 | executing==2.2.0
45 | fastapi==0.115.12
46 | fastjsonschema==2.21.1
47 | filelock==3.18.0
48 | flatbuffers==25.2.10
49 | forbiddenfruit==0.1.4 ; python_full_version < '4.0' and implementation_name == 'cpython'
50 | fqdn==1.5.1
51 | frozenlist==1.6.0
52 | fsspec==2024.12.0
53 | gitdb==4.0.12
54 | gitpython==3.1.44
55 | google-auth==2.39.0
56 | googleapis-common-protos==1.70.0
57 | greenlet==3.2.0 ; (python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')
58 | grpcio==1.71.0
59 | grpcio-health-checking==1.71.0
60 | grpcio-tools==1.71.0
61 | h11==0.14.0
62 | httpcore==1.0.8
63 | httptools==0.6.4
64 | httpx==0.28.1
65 | httpx-sse==0.4.0
66 | huggingface-hub==0.30.2
67 | humanfriendly==10.0
68 | idna==3.10
69 | importlib-metadata==8.6.1
70 | importlib-resources==6.5.2
71 | ipykernel==6.29.5
72 | ipython==9.1.0
73 | ipython-pygments-lexers==1.1.1
74 | ipywidgets==8.1.6
75 | isoduration==20.11.0
76 | jedi==0.19.2
77 | jinja2==3.1.6
78 | jiter==0.9.0
79 | joblib==1.4.2
80 | json5==0.12.0
81 | jsonpatch==1.33
82 | jsonpointer==3.0.0
83 | jsonschema==4.23.0
84 | jsonschema-rs==0.29.1 ; python_full_version < '4.0'
85 | jsonschema-specifications==2024.10.1
86 | jupyter-client==8.6.3
87 | jupyter-core==5.7.2
88 | jupyter-events==0.12.0
89 | jupyter-lsp==2.2.5
90 | jupyter-server==2.15.0
91 | jupyter-server-mathjax==0.2.6
92 | jupyter-server-terminals==0.5.3
93 | jupyterlab==4.4.0
94 | jupyterlab-git==0.51.1
95 | jupyterlab-pygments==0.3.0
96 | jupyterlab-server==2.27.3
97 | jupyterlab-widgets==3.0.14
98 | kubernetes==32.0.1
99 | langchain==0.3.23
100 | langchain-chroma==0.2.2 ; python_full_version >= '3.13'
101 | langchain-chroma==0.2.3 ; python_full_version < '3.13'
102 | langchain-community==0.3.21
103 | langchain-core==0.3.54
104 | langchain-openai==0.3.14
105 | langchain-text-splitters==0.3.8
106 | langchain-weaviate==0.0.4
107 | langgraph==0.3.31
108 | langgraph-api==0.1.9 ; python_full_version < '4.0'
109 | langgraph-checkpoint==2.0.24
110 | langgraph-cli==0.2.5
111 | langgraph-prebuilt==0.1.8
112 | langgraph-runtime-inmem==0.0.4 ; python_full_version < '4.0'
113 | langgraph-sdk==0.1.61
114 | langsmith==0.3.32
115 | lark==1.2.2
116 | llvmlite==0.44.0
117 | markdown-it-py==3.0.0
118 | markupsafe==3.0.2
119 | marshmallow==3.26.1
120 | matplotlib-inline==0.1.7
121 | mdurl==0.1.2
122 | mistune==3.1.3
123 | mmh3==5.1.0
124 | monotonic==1.6
125 | mpmath==1.3.0
126 | multidict==6.4.3
127 | mypy-extensions==1.0.0
128 | narwhals==1.35.0
129 | nbclient==0.10.2
130 | nbconvert==7.16.6
131 | nbdime==4.0.2
132 | nbformat==5.10.4
133 | nest-asyncio==1.6.0
134 | notebook-shim==0.2.4
135 | numba==0.61.2
136 | numpy==1.26.4
137 | oauthlib==3.2.2
138 | onnxruntime==1.21.1
139 | openai==1.75.0
140 | opentelemetry-api==1.32.1
141 | opentelemetry-exporter-otlp-proto-common==1.32.1
142 | opentelemetry-exporter-otlp-proto-grpc==1.32.1
143 | opentelemetry-instrumentation==0.53b1
144 | opentelemetry-instrumentation-asgi==0.53b1
145 | opentelemetry-instrumentation-fastapi==0.53b1
146 | opentelemetry-proto==1.32.1
147 | opentelemetry-sdk==1.32.1
148 | opentelemetry-semantic-conventions==0.53b1
149 | opentelemetry-util-http==0.53b1
150 | orjson==3.10.16
151 | ormsgpack==1.9.1
152 | overrides==7.7.0
153 | packaging==24.2
154 | pandas==2.2.3
155 | pandocfilters==1.5.1
156 | parso==0.8.4
157 | pexpect==4.9.0
158 | platformdirs==4.3.7
159 | plotly==6.0.1
160 | posthog==3.25.0
161 | prometheus-client==0.21.1
162 | prompt-toolkit==3.0.51
163 | propcache==0.3.1
164 | protobuf==5.29.4
165 | psutil==7.0.0
166 | psygnal==0.12.0
167 | ptyprocess==0.7.0
168 | pure-eval==0.2.3
169 | pyasn1==0.6.1
170 | pyasn1-modules==0.4.2
171 | pycparser==2.22
172 | pydantic==2.11.3
173 | pydantic-core==2.33.1
174 | pydantic-settings==2.9.1
175 | pygments==2.19.1
176 | pyjwt==2.10.1 ; python_full_version < '4.0'
177 | pynndescent==0.5.13
178 | pypika==0.48.9
179 | pyproject-hooks==1.2.0
180 | pyreadline3==3.5.4 ; sys_platform == 'win32'
181 | python-dateutil==2.9.0.post0
182 | python-dotenv==1.1.0
183 | python-json-logger==3.3.0
184 | pytube==11.0.2
185 | pytz==2025.2
186 | pywin32==310 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
187 | pywinpty==2.0.15 ; os_name == 'nt'
188 | pyyaml==6.0.2
189 | pyzmq==26.4.0
190 | referencing==0.36.2
191 | regex==2024.11.6
192 | requests==2.32.3
193 | requests-oauthlib==2.0.0
194 | requests-toolbelt==1.0.0
195 | rfc3339-validator==0.1.4
196 | rfc3986-validator==0.1.1
197 | rich==14.0.0
198 | rpds-py==0.24.0
199 | rsa==4.9.1
200 | ruff==0.11.6
201 | safetensors==0.5.3
202 | scikit-learn==1.6.1
203 | scipy==1.15.2
204 | send2trash==1.8.3
205 | setuptools==79.0.0
206 | shellingham==1.5.4
207 | simsimd==6.2.1
208 | six==1.17.0
209 | smmap==5.0.2
210 | sniffio==1.3.1
211 | soupsieve==2.7
212 | sqlalchemy==2.0.40
213 | sse-starlette==2.1.3 ; python_full_version < '4.0'
214 | stack-data==0.6.3
215 | starlette==0.46.2
216 | structlog==25.2.0 ; python_full_version < '4.0'
217 | sympy==1.13.1
218 | tenacity==9.1.2
219 | terminado==0.18.1
220 | threadpoolctl==3.6.0
221 | tiktoken==0.9.0
222 | tinycss2==1.4.0
223 | tokenizers==0.21.1
224 | tornado==6.4.2
225 | tqdm==4.67.1
226 | traitlets==5.14.3
227 | transformers==4.49.0
228 | typer==0.15.2
229 | types-python-dateutil==2.9.0.20241206
230 | typing-extensions==4.13.2
231 | typing-inspect==0.9.0
232 | typing-inspection==0.4.0
233 | tzdata==2025.2
234 | umap-learn==0.5.7
235 | uri-template==1.3.0
236 | urllib3==2.4.0
237 | uvicorn==0.34.2
238 | uvloop==0.21.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
239 | validators==0.34.0
240 | watchfiles==1.0.5
241 | wcwidth==0.2.13
242 | weaviate-client==4.13.2
243 | webcolors==24.11.1
244 | webencodings==0.5.1
245 | websocket-client==1.8.0
246 | websockets==15.0.1
247 | widgetsnbextension==4.0.14
248 | wrapt==1.17.2
249 | xxhash==3.5.0
250 | yarl==1.20.0
251 | youtube-transcript-api==1.0.3
252 | zipp==3.21.0
253 | zstandard==0.23.0
254 |
--------------------------------------------------------------------------------
/src/llm_rag/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from dotenv import find_dotenv, load_dotenv
4 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings
5 |
6 | load_dotenv(find_dotenv())
7 |
8 | project_path = Path(__file__).resolve().parents[2]
9 |
10 | llm = ChatOpenAI(model="gpt-4o-mini", temperature=1)
11 | embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
12 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/colbert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/colbert/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/colbert/colbert_model.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langgraph.graph import END, START, StateGraph
6 | from llm_rag import llm
7 | from llm_rag.indexing.colbert_model import retriever
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 | from rich.pretty import Pretty
11 |
12 | rag_prompt_template = """Answer the following question based on this context:
13 |
14 | {context}
15 |
16 | Question: {question}"""
17 |
18 |
19 | def format_docs(docs):
20 | return "\n\n".join(doc.page_content for doc in docs)
21 |
22 |
23 | class State(TypedDict):
24 | question: str
25 | context: list[Document]
26 | answer: str
27 |
28 |
29 | def retrieve(state: State):
30 | retrieved_docs = retriever.invoke(state["question"])
31 | return {"context": retrieved_docs}
32 |
33 |
34 | def generate(state: State):
35 | docs_content = format_docs(state["context"])
36 | rag_prompt = rag_prompt_template.format(
37 | question=state["question"], context=docs_content
38 | )
39 | response = llm.invoke([HumanMessage(content=rag_prompt)])
40 | return {"answer": response.content}
41 |
42 |
43 | graph_builder = StateGraph(State)
44 |
45 | graph_builder.add_node("retrieve", retrieve)
46 | graph_builder.add_node("generate", generate)
47 |
48 | graph_builder.add_edge(START, "retrieve")
49 | graph_builder.add_edge("retrieve", "generate")
50 | graph_builder.add_edge("generate", END)
51 |
52 | graph = graph_builder.compile()
53 |
54 |
55 | if __name__ == "__main__":
56 | queries = [
57 | "What is task decomposition for LLM agents?",
58 | "What are main steps for collecting human data?",
59 | ]
60 |
61 | for query in queries:
62 | response = graph.invoke({"question": query})
63 | rprint(Pretty(response, no_wrap=False))
64 | rprint(Markdown(response["answer"]))
65 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/crag/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/crag/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/crag/crag.py:
--------------------------------------------------------------------------------
1 | import operator
2 | from typing import Annotated, Literal, TypedDict
3 |
4 | from langchain_community.tools.tavily_search import TavilySearchResults
5 | from langchain_core.documents import Document
6 | from langchain_core.messages import HumanMessage
7 | from langchain_core.runnables import chain
8 | from langgraph.graph import END, START, StateGraph
9 | from pydantic import BaseModel, Field
10 | from rich import print as rprint
11 | from rich.markdown import Markdown
12 | from rich.pretty import Pretty
13 |
14 | from llm_rag import llm
15 | from llm_rag.indexing.reflection import retriever
16 |
17 | rag_prompt_template = """Answer the following question based on this context:
18 |
19 | {context}
20 |
21 | Question: {question}
22 | """
23 |
24 | grading_prompt_template = """You are a grader assessing relevance of a retrieved document to a user question.
25 | If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant.
26 | Give a binary score to indicate whether the document is relevant to the question.
27 |
28 | Retrieved document:
29 | {document}
30 |
31 | User question:
32 | {question}"""
33 |
34 | query_rewriting_prompt_template = """You a question re-writer that converts an input question to a better version that is optimized
35 | for web search.
36 | Look at the input and try to reason about the underlying semantic intent / meaning.
37 |
38 | Here is the initial question:
39 | {question}
40 |
41 | Formulate an improved question."""
42 |
43 |
44 | def format_docs(docs: list[Document]) -> list[str]:
45 | return "\n\n".join(doc.page_content for doc in docs)
46 |
47 |
48 | class DocumentGrade(BaseModel):
49 | """Relevance check on retrieved document."""
50 |
51 | chain_of_thought: str = Field(
52 | ...,
53 | description="Step by step reasoning to check if the document is relevant to the question",
54 | )
55 | is_relevant: bool = Field(description="Document is relevant to the question")
56 |
57 |
58 | grader_llm = llm.with_structured_output(DocumentGrade, method="function_calling")
59 |
60 |
61 | @chain
62 | def grade_document(document, question):
63 | grading_prompt = grading_prompt_template.format(
64 | document=document, question=question
65 | )
66 | response = grader_llm.invoke([HumanMessage(content=grading_prompt)])
67 | return response
68 |
69 |
70 | class WebSearchQuery(BaseModel):
71 | """Question optimization for web search."""
72 |
73 | chain_of_thought: str = Field(
74 | ..., description="Step by step reasoning to optimize query for web search"
75 | )
76 | web_search_query: str = Field(description="Optimized web search query")
77 |
78 |
79 | web_search_llm = llm.with_structured_output(WebSearchQuery, method="function_calling")
80 | web_search_tool = TavilySearchResults(k=4)
81 |
82 |
83 | class State(TypedDict):
84 | question: str
85 | documents: list[Document]
86 | grades: list[DocumentGrade]
87 | is_web_search_required: bool
88 | web_search_query: str
89 | context: Annotated[list[Document], operator.add]
90 | answer: str
91 |
92 |
93 | def retrieve(state: State):
94 | question = state["question"]
95 | documents = retriever.invoke(question)
96 | return {"documents": documents}
97 |
98 |
99 | def grade_documents(state: State):
100 | question = state["question"]
101 | documents = state["documents"]
102 |
103 | grades = grade_document.batch(documents, question=question)
104 | filtered_documents = [
105 | document for (document, grade) in zip(documents, grades) if grade.is_relevant
106 | ]
107 | is_web_search_required = len(filtered_documents) < len(documents)
108 |
109 | return {
110 | "context": filtered_documents,
111 | "grades": grades,
112 | "is_web_search_required": is_web_search_required,
113 | }
114 |
115 |
116 | def check_documents_relevance(
117 | state: State,
118 | ) -> Literal["rewrite_query", "generate_answer"]:
119 | is_web_search_required = state["is_web_search_required"]
120 |
121 | if is_web_search_required:
122 | return "rewrite_query"
123 | else:
124 | return "generate_answer"
125 |
126 |
127 | def rewrite_query(state: State):
128 | question = state["question"]
129 | query_rewriting_prompt = query_rewriting_prompt_template.format(question=question)
130 | response = web_search_llm.invoke(query_rewriting_prompt)
131 | return {"web_search_query": response.web_search_query}
132 |
133 |
134 | def web_search(state: State):
135 | query = state["web_search_query"]
136 | results = web_search_tool.invoke({"query": query})
137 | documents = [Document(page_content=result["content"]) for result in results]
138 | return {"context": documents}
139 |
140 |
141 | def generate_answer(state: State):
142 | docs_content = format_docs(state["context"])
143 | rag_prompt = rag_prompt_template.format(
144 | question=state["question"], context=docs_content
145 | )
146 | response = llm.invoke([HumanMessage(content=rag_prompt)])
147 | return {"answer": response.content}
148 |
149 |
150 | graph_builder = StateGraph(State)
151 |
152 | graph_builder.add_node("retrieve", retrieve)
153 | graph_builder.add_node("grade_documents", grade_documents)
154 | graph_builder.add_node("rewrite_query", rewrite_query)
155 | graph_builder.add_node("web_search", web_search)
156 | graph_builder.add_node("generate_answer", generate_answer)
157 |
158 | graph_builder.add_edge(START, "retrieve")
159 | graph_builder.add_edge("retrieve", "grade_documents")
160 | graph_builder.add_conditional_edges("grade_documents", check_documents_relevance)
161 | graph_builder.add_edge("rewrite_query", "web_search")
162 | graph_builder.add_edge("web_search", "generate_answer")
163 | graph_builder.add_edge("generate_answer", END)
164 |
165 | graph = graph_builder.compile()
166 |
167 |
168 | if __name__ == "__main__":
169 | queries = [
170 | "What are common types of agent memory?",
171 | "What are main steps for collecting human data?",
172 | "How does the AlphaCodium paper work?",
173 | ]
174 |
175 | for query in queries:
176 | response = graph.invoke({"question": query})
177 | rprint(Pretty(response))
178 | rprint(Markdown(response["answer"]))
179 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/decomposition/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/decomposition/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/decomposition/parallel.py:
--------------------------------------------------------------------------------
1 | import operator
2 | from typing import Annotated, TypedDict
3 |
4 | from langchain_core.documents import Document
5 | from langchain_core.messages import HumanMessage
6 | from langchain_core.runnables import RunnableConfig
7 | from langgraph.constants import Send
8 | from langgraph.graph import END, START, StateGraph
9 | from llm_rag import llm
10 | from llm_rag.indexing.article import retriever
11 | from pydantic import BaseModel, Field
12 | from rich import print as rprint
13 | from rich.markdown import Markdown
14 | from rich.pretty import Pretty
15 |
16 | decomposition_prompt_template = """You are a helpful assistant that generates multiple sub-questions related to an input question.
17 | The goal is to break down the input into a set of sub-problems / sub-questions that can be answered sequentially.
18 | Generate multiple search queries related to: {question}"""
19 |
20 |
21 | sub_question_prompt_template = """Answer the following question based on this context:
22 |
23 | {context}
24 |
25 | Question: {question}
26 | """
27 |
28 |
29 | rag_prompt_template = """Here is a set of Q+A pairs:
30 |
31 | {context}
32 |
33 | Use these to synthesize an answer to the question: {question}
34 | """
35 |
36 |
37 | def format_qa_pair(question: str, answer: str) -> str:
38 | return f"Question: {question} \nAnswer: {answer}\n\n\n"
39 |
40 |
41 | class State(TypedDict):
42 | question: str
43 | generated_sub_questions: list[str]
44 | qa_pairs: Annotated[list[dict[str, str]], operator.add]
45 | context: list[Document]
46 | answer: str
47 |
48 |
49 | class RetrieverState(TypedDict):
50 | generated_sub_question: str
51 |
52 |
53 | def generate_sub_questions(query: str, config: RunnableConfig) -> list[str]:
54 | max_generated_sub_questions_count = config["configurable"].get(
55 | "max_generated_sub_questions_count", 3
56 | )
57 |
58 | class SubQuestionsGenerator(BaseModel):
59 | sub_questions: list[str] = Field(
60 | ...,
61 | description="List of generated sub-problems / sub-questions",
62 | max_items=max_generated_sub_questions_count,
63 | )
64 |
65 | structured_llm = llm.with_structured_output(
66 | SubQuestionsGenerator, method="function_calling"
67 | )
68 | decomposition_prompt = decomposition_prompt_template.format(question=query)
69 | response = structured_llm.invoke([HumanMessage(content=decomposition_prompt)])
70 | questions = response.sub_questions
71 |
72 | return {"generated_sub_questions": questions}
73 |
74 |
75 | def assign_sub_questions(state: State):
76 | return [
77 | Send("answer_sub_question", {"generated_sub_question": sub_question})
78 | for sub_question in state["generated_sub_questions"]
79 | ]
80 |
81 |
82 | def answer_sub_question(state: RetrieverState):
83 | question = state["generated_sub_question"]
84 | context = retriever.invoke(question)
85 | sub_question_prompt = sub_question_prompt_template.format(
86 | context=context, question=question
87 | )
88 | answer = llm.invoke([HumanMessage(content=sub_question_prompt)])
89 | return {"qa_pairs": [{question: answer.content}]}
90 |
91 |
92 | def aggregate_qa_pairs(state: State):
93 | context = ""
94 |
95 | for qa_pair in state["qa_pairs"]:
96 | [(question, answer)] = qa_pair.items()
97 | context += format_qa_pair(question, answer)
98 |
99 | return {"context": context}
100 |
101 |
102 | def generate_answer(state: State):
103 | rag_prompt = rag_prompt_template.format(
104 | context=state["context"], question=state["question"]
105 | )
106 | response = llm.invoke([HumanMessage(content=rag_prompt)])
107 | return {"answer": response.content}
108 |
109 |
110 | class ConfigSchema(BaseModel):
111 | max_generated_sub_questions_count: int = Field(default=3, gt=1)
112 |
113 |
114 | graph_builder = StateGraph(State, ConfigSchema)
115 |
116 | graph_builder.add_node("generate_sub_questions", generate_sub_questions)
117 | graph_builder.add_node("answer_sub_question", answer_sub_question)
118 | graph_builder.add_node("aggregate_qa_pairs", aggregate_qa_pairs)
119 | graph_builder.add_node("generate_answer", generate_answer)
120 |
121 | graph_builder.add_edge(START, "generate_sub_questions")
122 | graph_builder.add_conditional_edges(
123 | "generate_sub_questions", assign_sub_questions, ["answer_sub_question"]
124 | )
125 | graph_builder.add_edge("answer_sub_question", "aggregate_qa_pairs")
126 | graph_builder.add_edge("aggregate_qa_pairs", "generate_answer")
127 | graph_builder.add_edge("generate_answer", END)
128 | graph = graph_builder.compile()
129 |
130 |
131 | if __name__ == "__main__":
132 | query = "What are the main components of an LLM-powered autonomous agent system?"
133 | config = {
134 | "configurable": {
135 | "max_generated_sub_questions_count": 5,
136 | }
137 | }
138 | response = graph.invoke(
139 | {"question": query},
140 | config=config,
141 | )
142 |
143 | rprint(Pretty(response, max_depth=2))
144 | rprint(Markdown(response["answer"]))
145 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/decomposition/recursive.py:
--------------------------------------------------------------------------------
1 | from typing import Literal, TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langchain_core.runnables import RunnableConfig
6 | from langgraph.graph import END, START, StateGraph
7 | from pydantic import BaseModel, Field
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 |
11 | from llm_rag import llm
12 | from llm_rag.indexing.article import vectorstore
13 |
14 | decomposition_prompt_template = """You are a helpful assistant that generates multiple sub-questions related to an input question.
15 | The goal is to break down the input into a set of sub-problems / sub-questions that can be answered sequentially.
16 | Generate multiple search queries related to: {question}"""
17 |
18 |
19 | recursive_prompt_template = """Here is the question you need to answer:
20 |
21 | {question}
22 |
23 |
24 | Here are any available background question + answer pairs:
25 |
26 | {qa_pairs}
27 |
28 |
29 | Here is additional context relevant to the question:
30 |
31 | {context}
32 |
33 |
34 | Use the above context and any background question + answer pairs to answer the question:
35 |
36 | {question}
37 |
38 | """
39 |
40 |
41 | def format_qa_pair(question: str, answer: str) -> str:
42 | return f"Question: {question} \nAnswer:\n{answer}\n\n"
43 |
44 |
45 | class State(TypedDict):
46 | question: str
47 | all_questions: list[str]
48 | current_question_idx: int
49 | qa_pairs: list[str]
50 | context: list[Document]
51 | answer: str
52 |
53 |
54 | def generate_sub_questions(state: State, config: RunnableConfig) -> list[str]:
55 | max_generated_sub_questions_count = config["configurable"].get(
56 | "max_generated_sub_questions_count", 3
57 | )
58 | query = state["question"]
59 |
60 | class SubQuestionsGenerator(BaseModel):
61 | sub_questions: list[str] = Field(
62 | ...,
63 | description="List of generated sub-problems / sub-questions",
64 | max_items=max_generated_sub_questions_count,
65 | )
66 |
67 | structured_llm = llm.with_structured_output(
68 | SubQuestionsGenerator, method="function_calling"
69 | )
70 | decomposition_prompt = decomposition_prompt_template.format(question=query)
71 | response = structured_llm.invoke([HumanMessage(content=decomposition_prompt)])
72 | questions = response.sub_questions + [query]
73 |
74 | return {"all_questions": questions, "current_question_idx": 0}
75 |
76 |
77 | def retrieve_docs(state: State):
78 | question = state["all_questions"][state["current_question_idx"]]
79 | retrieved_docs = vectorstore.similarity_search(question)
80 | return {"context": retrieved_docs}
81 |
82 |
83 | def generate_answer(state: State):
84 | question = state["all_questions"][state["current_question_idx"]]
85 | recursive_prompt = recursive_prompt_template.format(
86 | question=question, qa_pairs=state.get("qa_pairs", ""), context=state["context"]
87 | )
88 | answer = llm.invoke([HumanMessage(content=recursive_prompt)])
89 | qa_pair = format_qa_pair(question, answer.content)
90 | qa_pairs = state.get("qa_pairs", "") + qa_pair
91 |
92 | if state["current_question_idx"] == len(state["all_questions"]) - 1:
93 | return {"answer": answer.content}
94 | else:
95 | return {
96 | "qa_pairs": qa_pairs,
97 | "current_question_idx": state["current_question_idx"] + 1,
98 | }
99 |
100 |
101 | def check_answer_status(state: State) -> Literal["Next sub-question", "Final answer"]:
102 | if state.get("answer"):
103 | return "Final answer"
104 | else:
105 | return "Next sub-question"
106 |
107 |
108 | class ConfigSchema(BaseModel):
109 | max_generated_sub_questions_count: int = Field(default=3, gt=1)
110 |
111 |
112 | graph_builder = StateGraph(State, ConfigSchema)
113 |
114 | graph_builder.add_node("generate_sub_questions", generate_sub_questions)
115 | graph_builder.add_node("retrieve_docs", retrieve_docs)
116 | graph_builder.add_node("generate_answer", generate_answer)
117 |
118 | graph_builder.add_edge(START, "generate_sub_questions")
119 | graph_builder.add_edge("generate_sub_questions", "retrieve_docs")
120 | graph_builder.add_edge("retrieve_docs", "generate_answer")
121 | graph_builder.add_conditional_edges(
122 | "generate_answer",
123 | check_answer_status,
124 | {"Next sub-question": "retrieve_docs", "Final answer": END},
125 | )
126 |
127 | graph = graph_builder.compile()
128 |
129 |
130 | if __name__ == "__main__":
131 | query = "What are the main components of an LLM-powered autonomous agent system?"
132 | config = {
133 | "configurable": {
134 | "max_generated_sub_questions_count": 3,
135 | }
136 | }
137 |
138 | for stream_mode, event in graph.stream(
139 | {"question": query},
140 | stream_mode=["messages", "updates"],
141 | config=config,
142 | ):
143 | match stream_mode:
144 | case "messages":
145 | message, metadata = event
146 | print(message.content, end="", flush=True)
147 | case "updates":
148 | rprint(event)
149 |
150 | rprint(Markdown(event["generate_answer"]["answer"]))
151 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/hyde/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/hyde/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/hyde/hyde.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | import numpy as np
4 | from langchain_core.documents import Document
5 | from langchain_core.messages import HumanMessage
6 | from langchain_core.runnables import RunnableConfig
7 | from langgraph.graph import END, START, StateGraph
8 | from pydantic import BaseModel, Field
9 | from rich import print as rprint
10 | from rich.markdown import Markdown
11 | from rich.pretty import Pretty
12 |
13 | from llm_rag import embeddings, llm
14 | from llm_rag.indexing.article import vectorstore
15 |
16 | hyde_prompt_template = """Please write a passage to answer the question
17 | Question: {question}
18 | Passage:"""
19 |
20 |
21 | rag_prompt_template = """Answer the following question based on this context:
22 |
23 | {context}
24 |
25 | Question: {question}
26 | """
27 |
28 |
29 | def format_docs(docs):
30 | return "\n\n".join(doc.page_content for doc in docs)
31 |
32 |
33 | class State(TypedDict):
34 | question: str
35 | generated_documents: list[str]
36 | hyde_embeddings: np.ndarray
37 | context: list[Document]
38 | answer: str
39 |
40 |
41 | def generate_documents(state: State, config: RunnableConfig) -> list[Document]:
42 | generated_documents_count = config["configurable"].get(
43 | "generated_documents_count", 3
44 | )
45 |
46 | hyde_prompt = hyde_prompt_template.format(question=state["question"])
47 | generated_documents = llm.batch([hyde_prompt] * generated_documents_count)
48 |
49 | return {
50 | "generated_documents": [document.content for document in generated_documents]
51 | }
52 |
53 |
54 | def calculate_hyde_embeddings(state: State):
55 | question_embeddings = np.array(embeddings.embed_query(state["question"]))
56 | generated_documents_embeddings = np.array(
57 | embeddings.embed_documents(state["generated_documents"])
58 | )
59 | hyde_embeddings = np.vstack(
60 | [question_embeddings, generated_documents_embeddings]
61 | ).mean(axis=0)
62 | return {"hyde_embeddings": list(hyde_embeddings)}
63 |
64 |
65 | def get_relevant_documents(state: State):
66 | documents = vectorstore.similarity_search_by_vector(state["hyde_embeddings"])
67 | return {"context": documents}
68 |
69 |
70 | def generate_answer(state: State):
71 | docs_content = format_docs(state["context"])
72 | rag_prompt = rag_prompt_template.format(
73 | context=docs_content, question=state["question"]
74 | )
75 | response = llm.invoke([HumanMessage(content=rag_prompt)])
76 | return {"answer": response.content}
77 |
78 |
79 | class ConfigSchema(BaseModel):
80 | generated_documents_count: int = Field(default=3, gt=0)
81 |
82 |
83 | graph_builder = StateGraph(State, ConfigSchema)
84 |
85 | graph_builder.add_node("generate_documents", generate_documents)
86 | graph_builder.add_node("calculate_hyde_embeddings", calculate_hyde_embeddings)
87 | graph_builder.add_node("get_relevant_documents", get_relevant_documents)
88 | graph_builder.add_node("generate_answer", generate_answer)
89 |
90 | graph_builder.add_edge(START, "generate_documents")
91 | graph_builder.add_edge("generate_documents", "calculate_hyde_embeddings")
92 | graph_builder.add_edge("calculate_hyde_embeddings", "get_relevant_documents")
93 | graph_builder.add_edge("get_relevant_documents", "generate_answer")
94 | graph_builder.add_edge("generate_answer", END)
95 | graph = graph_builder.compile()
96 |
97 |
98 | if __name__ == "__main__":
99 | query = "What is task decomposition for LLM agents?"
100 | config = {
101 | "configurable": {
102 | "generated_documents_count": 5,
103 | }
104 | }
105 | response = graph.invoke(
106 | {"question": query},
107 | config=config,
108 | )
109 |
110 | rprint(Pretty(response, max_depth=2, max_length=20))
111 | rprint(Markdown(response["answer"]))
112 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/multi_query/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/multi_query/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/multi_query/multi_query.py:
--------------------------------------------------------------------------------
1 | import operator
2 | from typing import Annotated, TypedDict
3 |
4 | from langchain_core.documents import Document
5 | from langchain_core.load import dumps, loads
6 | from langchain_core.messages import HumanMessage
7 | from langchain_core.runnables import RunnableConfig
8 | from langgraph.constants import Send
9 | from langgraph.graph import END, START, StateGraph
10 | from pydantic import BaseModel, Field
11 | from rich import print as rprint
12 | from rich.markdown import Markdown
13 | from rich.pretty import Pretty
14 |
15 | from llm_rag import llm
16 | from llm_rag.indexing.article import vectorstore
17 |
18 | rag_prompt_template = """Answer the following question based on this context:
19 |
20 | {context}
21 |
22 | Question: {question}
23 | """
24 |
25 |
26 | def get_unique_docs(documents: list[list[Document]]) -> list[Document]:
27 | flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
28 | unique_docs = list(set(flattened_docs))
29 | return [loads(doc) for doc in unique_docs]
30 |
31 |
32 | def format_docs(docs: list[Document]) -> list[str]:
33 | return "\n\n".join(doc.page_content for doc in docs)
34 |
35 |
36 | class State(TypedDict):
37 | question: str
38 | generated_questions: list[str]
39 | retrieved_docs: Annotated[list[list[Document]], operator.add]
40 | context: list[Document]
41 | answer: str
42 |
43 |
44 | class RetrieverState(TypedDict):
45 | generated_question: str
46 |
47 |
48 | def generate_queries(state: State, config: RunnableConfig):
49 | generated_questions_count = config["configurable"].get(
50 | "generated_questions_count", 5
51 | )
52 | include_original_question = config["configurable"].get(
53 | "include_original_question", True
54 | )
55 |
56 | questions = []
57 | query = state["question"]
58 |
59 | if include_original_question:
60 | questions.append(query)
61 |
62 | class MultiQueryGenerator(BaseModel):
63 | questions: list[str] = Field(
64 | ...,
65 | description="List of questions generated multiple perspectives based on user query",
66 | min_items=generated_questions_count,
67 | max_items=generated_questions_count,
68 | )
69 |
70 | structured_llm = llm.with_structured_output(
71 | MultiQueryGenerator, method="function_calling"
72 | )
73 | response = structured_llm.invoke(query)
74 | questions.extend(response.questions)
75 |
76 | return {"generated_questions": questions}
77 |
78 |
79 | def assign_queries(state: State):
80 | return [
81 | Send("retrieve_docs", {"generated_question": question})
82 | for question in state["generated_questions"]
83 | ]
84 |
85 |
86 | def retrieve_docs(state: RetrieverState):
87 | retrieved_docs = vectorstore.similarity_search(state["generated_question"])
88 | return {"retrieved_docs": [retrieved_docs]}
89 |
90 |
91 | def aggregate_docs(state: State):
92 | retrieved_docs = state["retrieved_docs"]
93 | docs = get_unique_docs(retrieved_docs)
94 | return {"context": docs}
95 |
96 |
97 | def generate_answer(state: State):
98 | docs_content = format_docs(state["context"])
99 | rag_prompt = rag_prompt_template.format(
100 | question=state["question"], context=docs_content
101 | )
102 | response = llm.invoke([HumanMessage(content=rag_prompt)])
103 | return {"answer": response.content}
104 |
105 |
106 | class ConfigSchema(BaseModel):
107 | generated_questions_count: int = Field(default=5, gt=1)
108 | include_original_question: bool = Field(default=True)
109 |
110 |
111 | graph_builder = StateGraph(State, ConfigSchema)
112 |
113 | graph_builder.add_node("generate_queries", generate_queries)
114 | graph_builder.add_node("retrieve_docs", retrieve_docs)
115 | graph_builder.add_node("aggregate_docs", aggregate_docs)
116 | graph_builder.add_node("generate_answer", generate_answer)
117 |
118 | graph_builder.add_edge(START, "generate_queries")
119 | graph_builder.add_conditional_edges(
120 | "generate_queries", assign_queries, ["retrieve_docs"]
121 | )
122 | graph_builder.add_edge("retrieve_docs", "aggregate_docs")
123 | graph_builder.add_edge("aggregate_docs", "generate_answer")
124 | graph_builder.add_edge("generate_answer", END)
125 |
126 | graph = graph_builder.compile()
127 |
128 |
129 | if __name__ == "__main__":
130 | query = "What is task decomposition for LLM agents?"
131 | config = {
132 | "configurable": {
133 | "generated_questions_count": 3,
134 | "include_original_question": False,
135 | }
136 | }
137 | response = graph.invoke({"question": query}, config=config)
138 |
139 | rprint(Pretty(response, max_depth=2))
140 | rprint(Markdown(response["answer"]))
141 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/multi_vector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/multi_vector/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/multi_vector/chunks.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langgraph.graph import END, START, StateGraph
6 | from llm_rag import llm
7 | from llm_rag.indexing.multi_vector.chunks import retriever
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 | from rich.pretty import Pretty
11 |
12 | rag_prompt_template = """Answer the following question based on this context:
13 |
14 | {context}
15 |
16 | Question: {question}
17 | """
18 |
19 |
20 | def format_docs(docs):
21 | return "\n\n".join(doc.page_content for doc in docs)
22 |
23 |
24 | class State(TypedDict):
25 | question: str
26 | search_results: list[Document]
27 | context: list[Document]
28 | answer: str
29 |
30 |
31 | def retrieve(state: State):
32 | search_results = retriever.vectorstore.similarity_search(state["question"])
33 | retrieved_docs = retriever.invoke(state["question"])
34 | return {
35 | "search_results": search_results,
36 | "context": retrieved_docs,
37 | }
38 |
39 |
40 | def generate(state: State):
41 | docs_content = format_docs(state["context"])
42 | rag_prompt = rag_prompt_template.format(
43 | question=state["question"], context=docs_content
44 | )
45 | response = llm.invoke([HumanMessage(content=rag_prompt)])
46 | return {"answer": response.content}
47 |
48 |
49 | graph_builder = StateGraph(State)
50 |
51 | graph_builder.add_node("retrieve", retrieve)
52 | graph_builder.add_node("generate", generate)
53 |
54 | graph_builder.add_edge(START, "retrieve")
55 | graph_builder.add_edge("retrieve", "generate")
56 | graph_builder.add_edge("generate", END)
57 |
58 | graph = graph_builder.compile()
59 |
60 |
61 | if __name__ == "__main__":
62 | agent_query = "What is task decomposition for LLM agents?"
63 | response = graph.invoke({"question": agent_query})
64 | rprint(Pretty(response, max_string=100, no_wrap=False))
65 | rprint(Markdown(response["answer"]))
66 |
67 | human_data_query = "What are main steps for collecting human data?"
68 | response = graph.invoke({"question": human_data_query})
69 | rprint(Pretty(response, max_string=100, no_wrap=False))
70 | rprint(Markdown(response["answer"]))
71 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/multi_vector/hypothetical_questions.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langgraph.graph import END, START, StateGraph
6 | from llm_rag import llm
7 | from llm_rag.indexing.multi_vector.hypothetical_questions import retriever
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 | from rich.pretty import Pretty
11 |
12 | rag_prompt_template = """Answer the following question based on this context:
13 |
14 | {context}
15 |
16 | Question: {question}
17 | """
18 |
19 |
20 | def format_docs(docs):
21 | return "\n\n".join(doc.page_content for doc in docs)
22 |
23 |
24 | class State(TypedDict):
25 | question: str
26 | search_results: list[Document]
27 | context: list[Document]
28 | answer: str
29 |
30 |
31 | def retrieve(state: State):
32 | search_results = retriever.vectorstore.similarity_search(state["question"])
33 | retrieved_docs = retriever.invoke(state["question"])
34 | return {
35 | "search_results": search_results,
36 | "context": retrieved_docs,
37 | }
38 |
39 |
40 | def generate(state: State):
41 | docs_content = format_docs(state["context"])
42 | rag_prompt = rag_prompt_template.format(
43 | question=state["question"], context=docs_content
44 | )
45 | response = llm.invoke([HumanMessage(content=rag_prompt)])
46 | return {"answer": response.content}
47 |
48 |
49 | graph_builder = StateGraph(State)
50 |
51 | graph_builder.add_node("retrieve", retrieve)
52 | graph_builder.add_node("generate", generate)
53 |
54 | graph_builder.add_edge(START, "retrieve")
55 | graph_builder.add_edge("retrieve", "generate")
56 | graph_builder.add_edge("generate", END)
57 |
58 | graph = graph_builder.compile()
59 |
60 |
61 | if __name__ == "__main__":
62 | agent_query = "What is task decomposition for LLM agents?"
63 | response = graph.invoke({"question": agent_query})
64 | rprint(Pretty(response, max_string=100, no_wrap=False))
65 | rprint(Markdown(response["answer"]))
66 |
67 | human_data_query = "What are main steps for collecting human data?"
68 | response = graph.invoke({"question": human_data_query})
69 | rprint(Pretty(response, max_string=100, no_wrap=False))
70 | rprint(Markdown(response["answer"]))
71 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/multi_vector/summary.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langgraph.graph import END, START, StateGraph
6 | from llm_rag import llm
7 | from llm_rag.indexing.multi_vector.summary import retriever
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 | from rich.pretty import Pretty
11 |
12 | rag_prompt_template = """Answer the following question based on this context:
13 |
14 | {context}
15 |
16 | Question: {question}
17 | """
18 |
19 |
20 | def format_docs(docs):
21 | return "\n\n".join(doc.page_content for doc in docs)
22 |
23 |
24 | class State(TypedDict):
25 | question: str
26 | search_results: list[Document]
27 | context: list[Document]
28 | answer: str
29 |
30 |
31 | def retrieve(state: State):
32 | search_results = retriever.vectorstore.similarity_search(state["question"])
33 | retrieved_docs = retriever.invoke(state["question"])
34 | return {
35 | "search_results": search_results,
36 | "context": retrieved_docs,
37 | }
38 |
39 |
40 | def generate(state: State):
41 | docs_content = format_docs(state["context"])
42 | rag_prompt = rag_prompt_template.format(
43 | question=state["question"], context=docs_content
44 | )
45 | response = llm.invoke([HumanMessage(content=rag_prompt)])
46 | return {"answer": response.content}
47 |
48 |
49 | graph_builder = StateGraph(State)
50 |
51 | graph_builder.add_node("retrieve", retrieve)
52 | graph_builder.add_node("generate", generate)
53 |
54 | graph_builder.add_edge(START, "retrieve")
55 | graph_builder.add_edge("retrieve", "generate")
56 | graph_builder.add_edge("generate", END)
57 |
58 | graph = graph_builder.compile()
59 |
60 |
61 | if __name__ == "__main__":
62 | agent_query = "What is task decomposition for LLM agents?"
63 | response = graph.invoke({"question": agent_query})
64 | rprint(Pretty(response, max_string=100, no_wrap=False))
65 | rprint(Markdown(response["answer"]))
66 |
67 | human_data_query = "What are main steps for collecting human data?"
68 | response = graph.invoke({"question": human_data_query})
69 | rprint(Pretty(response, max_string=100, no_wrap=False))
70 | rprint(Markdown(response["answer"]))
71 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/query_construction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/query_construction/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/query_construction/self_query.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langgraph.graph import END, START, StateGraph
5 | from llm_rag.indexing.self_query import retriever
6 | from rich import print as rprint
7 | from rich.pretty import Pretty
8 |
9 |
10 | class State(TypedDict):
11 | question: str
12 | context: list[Document]
13 |
14 |
15 | def retrieve(state: State):
16 | retrieved_docs = retriever.invoke(state["question"])
17 | return {"context": retrieved_docs}
18 |
19 |
20 | graph_builder = StateGraph(State)
21 |
22 | graph_builder.add_node("retrieve", retrieve)
23 |
24 | graph_builder.add_edge(START, "retrieve")
25 | graph_builder.add_edge("retrieve", END)
26 |
27 | graph = graph_builder.compile()
28 |
29 |
30 | if __name__ == "__main__":
31 | questions = [
32 | "Which videos are 7 to 10 minutes long",
33 | "Videos published in March 2024",
34 | "Find tutorials with views not less than 100k",
35 | "Which videos should I watch on the topic of routing",
36 | "Which 1 video should I watch on the topic of routing",
37 | ]
38 |
39 | for question in questions:
40 | print(question)
41 | response = graph.invoke({"question": question})
42 | rprint(Pretty(response, max_string=100, no_wrap=False))
43 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/rag_fusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/rag_fusion/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/rag_fusion/rag_fusion.py:
--------------------------------------------------------------------------------
1 | import operator
2 | from collections import defaultdict
3 | from typing import Annotated, TypedDict
4 |
5 | from langchain_core.documents import Document
6 | from langchain_core.load import dumps, loads
7 | from langchain_core.messages import HumanMessage
8 | from langchain_core.runnables import RunnableConfig
9 | from langgraph.constants import Send
10 | from langgraph.graph import END, START, StateGraph
11 | from pydantic import BaseModel, Field
12 | from rich import print as rprint
13 | from rich.markdown import Markdown
14 | from rich.pretty import Pretty
15 |
16 | from llm_rag import llm
17 | from llm_rag.indexing.article import vectorstore
18 |
19 | rag_prompt_template = """Answer the following question based on this context:
20 |
21 | {context}
22 |
23 | Question: {question}
24 | """
25 |
26 |
27 | def reciprocal_rank_fusion(
28 | results: list[list[Document]], k: int = 60
29 | ) -> list[tuple[Document, float]]:
30 | fused_scores = defaultdict(int)
31 |
32 | for docs in results:
33 | for rank, doc in enumerate(docs, start=1):
34 | fused_scores[dumps(doc)] += 1 / (k + rank)
35 |
36 | reranked_results = [
37 | (loads(doc), score)
38 | for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
39 | ]
40 |
41 | return reranked_results
42 |
43 |
44 | def format_docs(docs: list[Document]) -> list[str]:
45 | return "\n\n".join(doc.page_content for doc in docs)
46 |
47 |
48 | class State(TypedDict):
49 | question: str
50 | generated_questions: list[str]
51 | retrieved_docs: Annotated[list[list[Document]], operator.add]
52 | context: list[Document]
53 | context_scores: list[float]
54 | answer: str
55 |
56 |
57 | class RetrieverState(TypedDict):
58 | generated_question: str
59 |
60 |
61 | def generate_queries(state: State, config: RunnableConfig):
62 | generated_questions_count = config["configurable"].get(
63 | "generated_questions_count", 5
64 | )
65 | include_original_question = config["configurable"].get(
66 | "include_original_question", True
67 | )
68 |
69 | questions = []
70 | query = state["question"]
71 |
72 | if include_original_question:
73 | questions.append(query)
74 |
75 | class MultiQueryGenerator(BaseModel):
76 | questions: list[str] = Field(
77 | ...,
78 | description="List of questions generated multiple perspectives based on user query",
79 | min_items=generated_questions_count,
80 | max_items=generated_questions_count,
81 | )
82 |
83 | structured_llm = llm.with_structured_output(
84 | MultiQueryGenerator, method="function_calling"
85 | )
86 | response = structured_llm.invoke(query)
87 | questions.extend(response.questions)
88 |
89 | return {"generated_questions": questions}
90 |
91 |
92 | def assign_queries(state: State):
93 | return [
94 | Send("retrieve_docs", {"generated_question": question})
95 | for question in state["generated_questions"]
96 | ]
97 |
98 |
99 | def retrieve_docs(state: RetrieverState):
100 | retrieved_docs = vectorstore.similarity_search(state["generated_question"])
101 | return {"retrieved_docs": [retrieved_docs]}
102 |
103 |
104 | def aggregate_docs(state: State):
105 | retrieved_docs = state["retrieved_docs"]
106 | reranked_results = reciprocal_rank_fusion(retrieved_docs)
107 | docs, scores = list(zip(*((doc, score) for doc, score in reranked_results)))
108 | return {"context": docs, "context_scores": scores}
109 |
110 |
111 | def generate_answer(state: State):
112 | docs_content = format_docs(state["context"])
113 | rag_prompt = rag_prompt_template.format(
114 | question=state["question"], context=docs_content
115 | )
116 | response = llm.invoke([HumanMessage(content=rag_prompt)])
117 | return {"answer": response.content}
118 |
119 |
120 | class ConfigSchema(BaseModel):
121 | generated_questions_count: int = Field(default=5, gt=1)
122 | include_original_question: bool = Field(default=True)
123 |
124 |
125 | graph_builder = StateGraph(State, ConfigSchema)
126 |
127 | graph_builder.add_node("generate_queries", generate_queries)
128 | graph_builder.add_node("retrieve_docs", retrieve_docs)
129 | graph_builder.add_node("aggregate_docs", aggregate_docs)
130 | graph_builder.add_node("generate_answer", generate_answer)
131 |
132 | graph_builder.add_edge(START, "generate_queries")
133 | graph_builder.add_conditional_edges(
134 | "generate_queries", assign_queries, ["retrieve_docs"]
135 | )
136 | graph_builder.add_edge("retrieve_docs", "aggregate_docs")
137 | graph_builder.add_edge("aggregate_docs", "generate_answer")
138 | graph_builder.add_edge("generate_answer", END)
139 |
140 | graph = graph_builder.compile()
141 |
142 |
143 | if __name__ == "__main__":
144 | query = "What is task decomposition for LLM agents?"
145 | config = {
146 | "configurable": {
147 | "generated_questions_count": 3,
148 | "include_original_question": False,
149 | }
150 | }
151 | response = graph.invoke({"question": query}, config=config)
152 |
153 | rprint(Pretty(response, max_depth=2))
154 | rprint(Markdown(response["answer"]))
155 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/raptor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/raptor/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/raptor/raptor.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langgraph.graph import END, START, StateGraph
6 | from llm_rag import llm
7 | from llm_rag.indexing.raptor.raptor import retriever
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 | from rich.pretty import Pretty
11 |
12 | rag_prompt_template = """Answer the following question based on this context:
13 |
14 | {context}
15 |
16 | Question: {question}
17 | """
18 |
19 |
20 | def format_docs(docs):
21 | return "\n\n".join(doc.page_content for doc in docs)
22 |
23 |
24 | class State(TypedDict):
25 | question: str
26 | context: list[Document]
27 | answer: str
28 |
29 |
30 | def retrieve(state: State):
31 | retrieved_docs = retriever.invoke(state["question"])
32 | return {"context": retrieved_docs}
33 |
34 |
35 | def generate(state: State):
36 | docs_content = format_docs(state["context"])
37 | rag_prompt = rag_prompt_template.format(
38 | question=state["question"], context=docs_content
39 | )
40 | response = llm.invoke([HumanMessage(content=rag_prompt)])
41 | return {"answer": response.content}
42 |
43 |
44 | graph_builder = StateGraph(State)
45 |
46 | graph_builder.add_node("retrieve", retrieve)
47 | graph_builder.add_node("generate", generate)
48 |
49 | graph_builder.add_edge(START, "retrieve")
50 | graph_builder.add_edge("retrieve", "generate")
51 | graph_builder.add_edge("generate", END)
52 |
53 | graph = graph_builder.compile()
54 |
55 |
56 | if __name__ == "__main__":
57 | high_level_query = "What is this documentation about?"
58 | response = graph.invoke({"question": high_level_query})
59 | rprint(Pretty(response, max_string=100, no_wrap=False))
60 | rprint(Markdown(response["answer"]))
61 |
62 | mid_level_query = "What are the main components of LangGraph"
63 | response = graph.invoke({"question": mid_level_query})
64 | rprint(Pretty(response, max_string=100, no_wrap=False))
65 | rprint(Markdown(response["answer"]))
66 |
67 | low_level_query = "What is time travel?"
68 | response = graph.invoke({"question": low_level_query})
69 | rprint(Pretty(response, max_string=100, no_wrap=False))
70 | rprint(Markdown(response["answer"]))
71 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/routing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/routing/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/routing/logical.py:
--------------------------------------------------------------------------------
1 | from typing import Literal, TypedDict
2 |
3 | from langchain_core.messages import HumanMessage, SystemMessage
4 | from langgraph.graph import END, START, StateGraph
5 | from llm_rag import llm
6 | from pydantic import BaseModel, Field
7 | from rich import print as rprint
8 |
9 | system_prompt = """You are an expert at routing a user question to the appropriate data source.
10 |
11 | Based on the programming language the question is referring to, route it to the relevant data source."""
12 |
13 |
14 | class RouteInfo(BaseModel):
15 | """Route a user query to the most relevant data source."""
16 |
17 | data_source: Literal["python_docs", "js_docs", "golang_docs"] = Field(
18 | ...,
19 | description="Given a user question choose which data source would be most relevant for answering their question",
20 | )
21 |
22 |
23 | structured_llm = llm.with_structured_output(RouteInfo, method="function_calling")
24 |
25 |
26 | class State(TypedDict):
27 | question: str
28 | data_source: str
29 | context: str
30 | answer: str
31 |
32 |
33 | def select_data_source(state: State):
34 | response = structured_llm.invoke(
35 | [
36 | SystemMessage(content=system_prompt),
37 | HumanMessage(content=state["question"]),
38 | ]
39 | )
40 | return {"data_source": response.data_source}
41 |
42 |
43 | def route_query(state: State) -> Literal["python_docs", "js_docs", "golang_docs"]:
44 | return state["data_source"]
45 |
46 |
47 | def retrieve_python_docs(state: State):
48 | return {"context": "Python documentation"}
49 |
50 |
51 | def retrieve_js_docs(state: State):
52 | return {"context": "Javascript documentation"}
53 |
54 |
55 | def retrieve_golang_docs(state: State):
56 | return {"context": "Go documentation"}
57 |
58 |
59 | def generate_answer(state: State):
60 | return {"answer": f"Answer based on {state['context']}"}
61 |
62 |
63 | graph_builder = StateGraph(State)
64 |
65 | graph_builder.add_node("select_data_source", select_data_source)
66 | graph_builder.add_node("python_docs", retrieve_python_docs)
67 | graph_builder.add_node("js_docs", retrieve_js_docs)
68 | graph_builder.add_node("golang_docs", retrieve_golang_docs)
69 | graph_builder.add_node("generate_answer", generate_answer)
70 |
71 | graph_builder.add_edge(START, "select_data_source")
72 | graph_builder.add_conditional_edges(
73 | "select_data_source", route_query, ["python_docs", "js_docs", "golang_docs"]
74 | )
75 | graph_builder.add_edge("python_docs", "generate_answer")
76 | graph_builder.add_edge("js_docs", "generate_answer")
77 | graph_builder.add_edge("golang_docs", "generate_answer")
78 | graph_builder.add_edge("generate_answer", END)
79 |
80 | graph = graph_builder.compile()
81 |
82 |
83 | if __name__ == "__main__":
84 | python_query = """Why doesn't the following code work:
85 |
86 | from langchain_core.prompts import ChatPromptTemplate
87 |
88 | prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
89 | prompt.invoke("french")
90 | """
91 | response = graph.invoke({"question": python_query})
92 | rprint(response)
93 |
94 | javascript_query = """Which arguments has getElementById function?"""
95 | response = graph.invoke({"question": javascript_query})
96 | rprint(response)
97 |
98 | golang_query = """What is struct?"""
99 | response = graph.invoke({"question": golang_query})
100 | rprint(response)
101 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/routing/semantic.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_community.utils.math import cosine_similarity
4 | from langchain_core.messages import HumanMessage
5 | from langgraph.graph import END, START, StateGraph
6 | from llm_rag import embeddings, llm
7 | from rich import print as rprint
8 | from rich.markdown import Markdown
9 |
10 | prompt_names = ["PHYSICS", "MATH", "OTHER"]
11 |
12 |
13 | physics_prompt_template = """You are a very smart physics professor.
14 | You are great at answering questions about physics in a concise and easy to understand manner.
15 | When you don't know the answer to a question you admit that you don't know.
16 |
17 | Here is a question:
18 | {question}"""
19 |
20 |
21 | math_prompt_template = """You are a very good mathematician. You are great at answering math questions.
22 | You are so good because you are able to break down hard problems into their component parts,
23 | answer the component parts, and then put them together to answer the broader question.
24 |
25 | Here is a question:
26 | {question}"""
27 |
28 |
29 | other_prompt_template = f"""You are a helpful assistant. You are great at answering all questions not from the following themes: {prompt_names[:-1]}
30 |
31 | Here is a question:
32 | {{question}}"""
33 |
34 |
35 | prompt_templates = [
36 | physics_prompt_template,
37 | math_prompt_template,
38 | other_prompt_template,
39 | ]
40 | prompt_embeddings = embeddings.embed_documents(prompt_templates)
41 |
42 |
43 | class State(TypedDict):
44 | question: str
45 | most_similar_prompt_idx: int
46 | most_similar_prompt_name: str
47 | answer: str
48 |
49 |
50 | def select_route_prompt(state: State):
51 | query_embedding = embeddings.embed_query(state["question"])
52 | query_similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
53 | most_similar_prompt_idx = query_similarity.argmax()
54 | return {
55 | "most_similar_prompt_idx": most_similar_prompt_idx,
56 | "most_similar_prompt_name": prompt_names[most_similar_prompt_idx],
57 | }
58 |
59 |
60 | def generate_answer(state: State):
61 | route_prompt = prompt_templates[state["most_similar_prompt_idx"]].format(
62 | question=state["question"]
63 | )
64 | response = llm.invoke([HumanMessage(content=route_prompt)])
65 | return {"answer": response.content}
66 |
67 |
68 | graph_builder = StateGraph(State)
69 |
70 | graph_builder.add_node("select_route_prompt", select_route_prompt)
71 | graph_builder.add_node("generate_answer", generate_answer)
72 |
73 | graph_builder.add_edge(START, "select_route_prompt")
74 | graph_builder.add_edge("select_route_prompt", "generate_answer")
75 | graph_builder.add_edge("generate_answer", END)
76 |
77 | graph = graph_builder.compile()
78 |
79 |
80 | if __name__ == "__main__":
81 | queries = [
82 | "What's a black hole",
83 | "What is the square root of 81",
84 | "Hello! How are you?",
85 | ]
86 |
87 | for query in queries:
88 | print(query)
89 | response = graph.invoke({"question": query})
90 | rprint(response)
91 | rprint(Markdown(response["answer"]))
92 | rprint("=" * 50)
93 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/self_rag/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/self_rag/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/self_rag/self_rag.py:
--------------------------------------------------------------------------------
1 | from typing import Literal, TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langchain_core.runnables import chain
6 | from langgraph.graph import END, START, StateGraph
7 | from pydantic import BaseModel, Field
8 | from rich import print as rprint
9 | from rich.markdown import Markdown
10 | from rich.pretty import Pretty
11 |
12 | from llm_rag import llm
13 | from llm_rag.indexing.reflection import retriever
14 |
15 | retrieval_prompt_template = """You are grader assistant assessing the need to retrieve additional documents to answer the user's question.
16 | If you are sure that all the necessary data is available, then you do not need to retrieve additional documents.
17 | Give a binary score to indicate whether retrieval is required.
18 |
19 | User question:
20 | {question}
21 | """
22 |
23 | rag_prompt_template = """Answer the following question based on this context:
24 |
25 | {context}
26 |
27 | Question: {question}
28 | """
29 |
30 | answer_prompt_template = """Answer the following question:
31 |
32 | Question: {question}
33 | """
34 |
35 | no_answer_prompt = "I don't have an answer to the question."
36 |
37 | relevance_grading_prompt_template = """You are a grader assessing relevance of a retrieved document to a user question.
38 | It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
39 | If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
40 | Give a binary score to indicate whether the document is relevant to the question.
41 |
42 | Retrieved document:
43 | {document}
44 |
45 | User question:
46 | {question}
47 | """
48 |
49 | hallucinations_grading_prompt_template = """You are a grader assessing whether an LLM answer is grounded in / supported by a set of retrieved facts.
50 | Give a binary score whether the answer is grounded in / supported by the set of facts.
51 |
52 | Set of facts:
53 | {context}
54 |
55 | LLM answer:
56 | {answer}
57 | """
58 |
59 | answer_grading_prompt_template = """You are a grader assessing whether an answer addresses / resolves a question.
60 | Give a binary score whether the answer resolves the question.
61 |
62 | User question:
63 | {question}
64 |
65 | LLM answer:
66 | {answer}
67 | """
68 |
69 | query_rewriting_prompt_template = """You a question re-writer that converts an input question to a better version that is optimized for web search.
70 | Look at the input and try to reason about the underlying semantic intent / meaning.
71 |
72 | Here is the initial question:
73 | {question}
74 |
75 | Formulate an improved question."""
76 |
77 |
78 | def format_docs(docs: list[Document]) -> list[str]:
79 | return "\n\n".join(doc.page_content for doc in docs)
80 |
81 |
82 | class RetrievalGrade(BaseModel):
83 | """Check if retrieval of additional documents is required."""
84 |
85 | chain_of_thought: str = Field(
86 | ...,
87 | description="Step by step reasoning to check if retrieval of additional documents is required",
88 | )
89 | is_required: bool = Field(
90 | description="Retrieval of additional documents is required"
91 | )
92 |
93 |
94 | retrieval_grader_llm = llm.with_structured_output(
95 | RetrievalGrade, method="function_calling"
96 | )
97 |
98 |
99 | class RelevanceGrade(BaseModel):
100 | """Relevance check on retrieved document."""
101 |
102 | chain_of_thought: str = Field(
103 | ...,
104 | description="Step by step reasoning to check if the document is relevant to the question",
105 | )
106 | is_relevant: bool = Field(description="Document is relevant to the question")
107 |
108 |
109 | relevance_grader_llm = llm.with_structured_output(
110 | RelevanceGrade, method="function_calling"
111 | )
112 |
113 |
114 | @chain
115 | def grade_document_relevance(document, question):
116 | relevance_grading_prompt = relevance_grading_prompt_template.format(
117 | document=document, question=question
118 | )
119 | response = relevance_grader_llm.invoke(
120 | [HumanMessage(content=relevance_grading_prompt)]
121 | )
122 | return response
123 |
124 |
125 | class HallucationsGrade(BaseModel):
126 | """Hallucination check in generated answer."""
127 |
128 | chain_of_thought: str = Field(
129 | ...,
130 | description="Step by step reasoning to check if the answer is grounded in the facts",
131 | )
132 | is_grounded: bool = Field(description="Answer is grounded in the facts")
133 |
134 |
135 | hallucations_grader_llm = llm.with_structured_output(
136 | HallucationsGrade, method="function_calling"
137 | )
138 |
139 |
140 | class AnswerGrade(BaseModel):
141 | """Check if answer addresses the question."""
142 |
143 | chain_of_thought: str = Field(
144 | ...,
145 | description="Step by step reasoning to check if the answer addresses the questions",
146 | )
147 | is_useful: bool = Field(description="Answer addresses the question")
148 |
149 |
150 | answer_grader_llm = llm.with_structured_output(AnswerGrade, method="function_calling")
151 |
152 |
153 | class SearchQuery(BaseModel):
154 | """Question optimization for search."""
155 |
156 | chain_of_thought: str = Field(
157 | ..., description="Step by step reasoning to optimize query for search"
158 | )
159 | search_query: str = Field(description="Optimized search query")
160 |
161 |
162 | search_llm = llm.with_structured_output(SearchQuery, method="function_calling")
163 |
164 |
165 | class State(TypedDict):
166 | question: str
167 | retrieval_grade: RetrievalGrade
168 | documents: list[Document]
169 | relevance_grades: list[RelevanceGrade]
170 | generation: str
171 | hallucinations_grade: HallucationsGrade
172 | context: list[Document]
173 | answer_grade: AnswerGrade
174 | answer: str
175 |
176 |
177 | def grade_retrieval(state: State):
178 | question = state["question"]
179 | retrieval_prompt = retrieval_prompt_template.format(question=question)
180 | retrieval_grade = retrieval_grader_llm.invoke(retrieval_prompt)
181 | return {"retrieval_grade": retrieval_grade}
182 |
183 |
184 | def decide_to_retrieve(state: State) -> Literal["retrieve", "generate_answer"]:
185 | retrieval_grade = state["retrieval_grade"]
186 |
187 | if retrieval_grade.is_required:
188 | return "retrieve"
189 | else:
190 | return "generate_answer"
191 |
192 |
193 | def retrieve(state: State):
194 | question = state["question"]
195 | documents = retriever.invoke(question)
196 | return {"documents": documents}
197 |
198 |
199 | def grade_documents(state: State):
200 | question = state["question"]
201 | documents = state["documents"]
202 |
203 | relevance_grades = grade_document_relevance.batch(documents, question=question)
204 | filtered_documents = [
205 | document
206 | for (document, relevance_grade) in zip(documents, relevance_grades)
207 | if relevance_grade.is_relevant
208 | ]
209 |
210 | return {"context": filtered_documents, "relevance_grades": relevance_grades}
211 |
212 |
213 | def check_documents_relevance(
214 | state: State,
215 | ) -> Literal["generate_rag_answer", "generate_no_answer"]:
216 | filtered_documents = state["context"]
217 |
218 | if len(filtered_documents) > 0:
219 | return "generate_rag_answer"
220 | else:
221 | return "generate_no_answer"
222 |
223 |
224 | def generate_rag_answer(state: State):
225 | docs_content = format_docs(state["context"])
226 | rag_prompt = rag_prompt_template.format(
227 | question=state["question"], context=docs_content
228 | )
229 | response = llm.invoke([HumanMessage(content=rag_prompt)])
230 | return {"answer": response.content}
231 |
232 |
233 | def generate_answer(state: State):
234 | answer_prompt = answer_prompt_template.format(question=state["question"])
235 | response = llm.invoke([HumanMessage(content=answer_prompt)])
236 | return {"answer": response.content}
237 |
238 |
239 | def generate_no_answer(state: State):
240 | return {"answer": no_answer_prompt}
241 |
242 |
243 | def grade_hallucinations(state: State):
244 | filtered_documents = state["context"]
245 | answer = state["answer"]
246 | hallucinations_grading_prompt = hallucinations_grading_prompt_template.format(
247 | context=filtered_documents, answer=answer
248 | )
249 | hallucinations_grade = hallucations_grader_llm.invoke(hallucinations_grading_prompt)
250 | return {"hallucinations_grade": hallucinations_grade}
251 |
252 |
253 | def check_hallucinations(
254 | state: State,
255 | ) -> Literal["grade_answer", "generate_rag_answer"]:
256 | hallucinations_grade = state["hallucinations_grade"]
257 |
258 | if hallucinations_grade.is_grounded:
259 | return "grade_answer"
260 | else:
261 | return "generate_rag_answer"
262 |
263 |
264 | def grade_answer(state: State):
265 | question = state["question"]
266 | answer = state["answer"]
267 | answer_grading_prompt = answer_grading_prompt_template.format(
268 | question=question, answer=answer
269 | )
270 | answer_grade = answer_grader_llm.invoke(answer_grading_prompt)
271 | return {"answer_grade": answer_grade}
272 |
273 |
274 | def check_answer(state: State) -> Literal["__end__", "rewrite_query"]:
275 | answer_grade = state["answer_grade"]
276 |
277 | if answer_grade.is_useful:
278 | return "__end__"
279 | else:
280 | return "rewrite_query"
281 |
282 |
283 | def rewrite_query(state: State):
284 | question = state["question"]
285 | query_rewriting_prompt = query_rewriting_prompt_template.format(question=question)
286 | response = search_llm.invoke(query_rewriting_prompt)
287 | return {"question": response.search_query}
288 |
289 |
290 | graph_builder = StateGraph(State)
291 |
292 | graph_builder.add_edge(START, "grade_retrieval")
293 | graph_builder.add_node("grade_retrieval", grade_retrieval)
294 | graph_builder.add_conditional_edges("grade_retrieval", decide_to_retrieve)
295 |
296 | graph_builder.add_node("generate_answer", generate_answer)
297 | graph_builder.add_edge("generate_answer", END)
298 |
299 | graph_builder.add_node("retrieve", retrieve)
300 | graph_builder.add_edge("retrieve", "grade_documents")
301 | graph_builder.add_node("grade_documents", grade_documents)
302 | graph_builder.add_conditional_edges("grade_documents", check_documents_relevance)
303 |
304 | graph_builder.add_node("generate_rag_answer", generate_rag_answer)
305 | graph_builder.add_edge("generate_rag_answer", "grade_hallucinations")
306 | graph_builder.add_node("grade_hallucinations", grade_hallucinations)
307 | graph_builder.add_conditional_edges("grade_hallucinations", check_hallucinations)
308 |
309 | graph_builder.add_node("generate_no_answer", generate_no_answer)
310 | graph_builder.add_edge("generate_no_answer", END)
311 |
312 | graph_builder.add_node("grade_answer", grade_answer)
313 | graph_builder.add_conditional_edges("grade_answer", check_answer)
314 |
315 | graph_builder.add_node("rewrite_query", rewrite_query)
316 | graph_builder.add_edge("rewrite_query", "retrieve")
317 |
318 | graph = graph_builder.compile()
319 |
320 |
321 | if __name__ == "__main__":
322 | queries = [
323 | "What are common types of agent memory?",
324 | "What are recent types of adversarial attacks in LLM?",
325 | "How does the AlphaCodium paper work?",
326 | ]
327 |
328 | for query in queries:
329 | response = graph.invoke({"question": query})
330 | rprint(Pretty(response))
331 | rprint(Markdown(response["answer"]))
332 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/step_back/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/step_back/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/graphs/step_back/step_back.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.documents import Document
4 | from langchain_core.messages import HumanMessage
5 | from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
6 | from langgraph.graph import END, START, StateGraph
7 | from llm_rag import llm
8 | from llm_rag.indexing.article import vectorstore
9 | from rich import print as rprint
10 | from rich.markdown import Markdown
11 | from rich.pretty import Pretty
12 |
13 | step_back_prompt_template = "You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:"
14 | examples = [
15 | {
16 | "input": "Could the members of The Police perform lawful arrests?",
17 | "output": "what can the members of The Police do?",
18 | },
19 | {
20 | "input": "Jan Sindel’s was born in what country?",
21 | "output": "what is Jan Sindel’s personal history?",
22 | },
23 | ]
24 | example_prompt = ChatPromptTemplate.from_messages(
25 | [
26 | ("human", "{input}"),
27 | ("ai", "{output}"),
28 | ]
29 | )
30 | few_shot_prompt = FewShotChatMessagePromptTemplate(
31 | example_prompt=example_prompt,
32 | examples=examples,
33 | )
34 | step_back_prompt = ChatPromptTemplate.from_messages(
35 | [("system", step_back_prompt_template), few_shot_prompt, ("human", "{question}")]
36 | )
37 |
38 |
39 | final_answer_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.
40 |
41 | {context}
42 | {step_back_context}
43 |
44 | Original Question: {question}
45 | Answer:"""
46 |
47 |
48 | class State(TypedDict):
49 | question: str
50 | context: list[Document]
51 | step_back_question: str
52 | step_back_context: list[Document]
53 | answer: str
54 |
55 |
56 | def retrieve_docs(state: State):
57 | question = state["question"]
58 | retrieved_docs = vectorstore.similarity_search(question)
59 | return {"context": retrieved_docs}
60 |
61 |
62 | def generate_step_back_question(state: State):
63 | step_back_prompt_messages = step_back_prompt.format(question=state["question"])
64 | step_back_question = llm.invoke(step_back_prompt_messages)
65 | return {"step_back_question": step_back_question.content}
66 |
67 |
68 | def retrieve_step_back_docs(state: State):
69 | step_back_question = state["step_back_question"]
70 | retrieved_step_back_docs = vectorstore.similarity_search(step_back_question)
71 | return {"step_back_context": retrieved_step_back_docs}
72 |
73 |
74 | def generate_answer(state: State):
75 | final_answer_prompt = final_answer_prompt_template.format(
76 | context=state["context"],
77 | step_back_context=state["step_back_context"],
78 | question=state["question"],
79 | )
80 | response = llm.invoke([HumanMessage(content=final_answer_prompt)])
81 | return {"answer": response.content}
82 |
83 |
84 | graph_builder = StateGraph(State)
85 |
86 | graph_builder.add_node("retrieve_docs", retrieve_docs)
87 | graph_builder.add_node("generate_step_back_question", generate_step_back_question)
88 | graph_builder.add_node("retrieve_step_back_docs", retrieve_step_back_docs)
89 | graph_builder.add_node("generate_answer", generate_answer)
90 |
91 | graph_builder.add_edge(START, "retrieve_docs")
92 | graph_builder.add_edge("retrieve_docs", "generate_step_back_question")
93 | graph_builder.add_edge("generate_step_back_question", "retrieve_step_back_docs")
94 | graph_builder.add_edge("retrieve_step_back_docs", "generate_answer")
95 | graph_builder.add_edge("generate_answer", END)
96 |
97 | graph = graph_builder.compile()
98 |
99 |
100 | if __name__ == "__main__":
101 | query = "What is task decomposition for LLM agents?"
102 | response = graph.invoke(
103 | {"question": query},
104 | )
105 |
106 | rprint(Pretty(response, max_depth=2))
107 | rprint(Markdown(response["answer"]))
108 |
--------------------------------------------------------------------------------
/src/llm_rag/graphs/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/graphs/utils.py
--------------------------------------------------------------------------------
/src/llm_rag/indexing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/indexing/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/indexing/article.py:
--------------------------------------------------------------------------------
1 | import bs4
2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
3 | from langchain_community.document_loaders import WebBaseLoader
4 | from langchain_core.vectorstores import InMemoryVectorStore
5 | from llm_rag import embeddings
6 |
7 |
8 | def load_documents():
9 | loader = WebBaseLoader(
10 | web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
11 | bs_kwargs=dict(
12 | parse_only=bs4.SoupStrainer(
13 | class_=("post-content", "post-title", "post-header")
14 | )
15 | ),
16 | )
17 | docs = loader.load()
18 | return docs
19 |
20 |
21 | def prepare_vectorstore(docs, embeddings):
22 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
23 | splits = text_splitter.split_documents(docs)
24 | vectorstore = InMemoryVectorStore(embeddings)
25 | vectorstore.add_documents(documents=splits)
26 | return vectorstore
27 |
28 |
29 | docs = load_documents()
30 | vectorstore = prepare_vectorstore(docs, embeddings)
31 | retriever = vectorstore.as_retriever()
32 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/colbert_model.py:
--------------------------------------------------------------------------------
1 | import bs4
2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
3 | from langchain_community.document_loaders import WebBaseLoader
4 | from ragatouille import RAGPretrainedModel
5 |
6 |
7 | def load_documents():
8 | articles = [
9 | "https://lilianweng.github.io/posts/2023-06-23-agent/",
10 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/",
11 | ]
12 |
13 | loader = WebBaseLoader(
14 | web_paths=articles,
15 | bs_kwargs=dict(
16 | parse_only=bs4.SoupStrainer(
17 | class_=("post-content", "post-title", "post-header")
18 | )
19 | ),
20 | )
21 | docs = loader.load()
22 | return docs
23 |
24 |
25 | def prepare_model(docs):
26 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
27 | splits = text_splitter.split_documents(docs)
28 |
29 | docs_texts = [doc.page_content for doc in splits]
30 | docs_metadatas = [doc.metadata for doc in splits]
31 |
32 | model = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
33 | model.index(
34 | collection=docs_texts,
35 | document_metadatas=docs_metadatas,
36 | index_name="blog",
37 | split_documents=False,
38 | )
39 |
40 | return model
41 |
42 |
43 | docs = load_documents()
44 | model = prepare_model(docs)
45 | retriever = model.as_langchain_retriever(k=10)
46 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/multi_vector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/indexing/multi_vector/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/indexing/multi_vector/chunks.py:
--------------------------------------------------------------------------------
1 | import uuid
2 |
3 | import bs4
4 | from langchain.retrievers.multi_vector import MultiVectorRetriever
5 | from langchain.text_splitter import RecursiveCharacterTextSplitter
6 | from langchain_community.document_loaders import WebBaseLoader
7 | from langchain_core.stores import InMemoryByteStore
8 | from langchain_core.vectorstores import InMemoryVectorStore
9 | from llm_rag import embeddings
10 |
11 | summarization_prompt_template = "Summarize the following document:\n\n{doc}"
12 |
13 |
14 | def load_documents():
15 | articles = [
16 | "https://lilianweng.github.io/posts/2023-06-23-agent/",
17 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/",
18 | ]
19 | loader = WebBaseLoader(
20 | web_paths=articles,
21 | bs_kwargs=dict(
22 | parse_only=bs4.SoupStrainer(
23 | class_=("post-content", "post-title", "post-header")
24 | )
25 | ),
26 | )
27 | docs = loader.load()
28 | return docs
29 |
30 |
31 | def prepare_retriever(docs, embeddings):
32 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
33 | splits = text_splitter.split_documents(docs)
34 |
35 | vectorstore = InMemoryVectorStore(embeddings)
36 | store = InMemoryByteStore()
37 | id_key = "split_id"
38 |
39 | retriever = MultiVectorRetriever(
40 | vectorstore=vectorstore,
41 | byte_store=store,
42 | id_key=id_key,
43 | )
44 |
45 | split_ids = [str(uuid.uuid4()) for _ in splits]
46 |
47 | child_text_splitter = RecursiveCharacterTextSplitter(
48 | chunk_size=1000, chunk_overlap=0
49 | )
50 |
51 | all_sub_splits = []
52 |
53 | for i, split in enumerate(splits):
54 | split_id = split_ids[i]
55 | sub_splits = child_text_splitter.split_documents([split])
56 |
57 | for sub_split in sub_splits:
58 | sub_split.metadata[id_key] = split_id
59 |
60 | all_sub_splits.extend(sub_splits)
61 |
62 | retriever.vectorstore.add_documents(all_sub_splits)
63 | retriever.docstore.mset(list(zip(split_ids, splits)))
64 |
65 | return retriever
66 |
67 |
68 | docs = load_documents()
69 | retriever = prepare_retriever(docs, embeddings)
70 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/multi_vector/hypothetical_questions.py:
--------------------------------------------------------------------------------
1 | import uuid
2 |
3 | import bs4
4 | from langchain.retrievers.multi_vector import MultiVectorRetriever
5 | from langchain_community.document_loaders import WebBaseLoader
6 | from langchain_core.documents import Document
7 | from langchain_core.messages import HumanMessage
8 | from langchain_core.runnables import chain
9 | from langchain_core.stores import InMemoryByteStore
10 | from langchain_core.vectorstores import InMemoryVectorStore
11 | from llm_rag import embeddings, llm
12 | from pydantic import BaseModel, Field
13 |
14 | hypothetical_questions_prompt_template = "Generate a list of exactly {hypothetical_questions_count} hypothetical questions that the below document could be used to answer:\n\n{doc}"
15 |
16 |
17 | def load_documents():
18 | articles = [
19 | "https://lilianweng.github.io/posts/2023-06-23-agent/",
20 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/",
21 | ]
22 | loader = WebBaseLoader(
23 | web_paths=articles,
24 | bs_kwargs=dict(
25 | parse_only=bs4.SoupStrainer(
26 | class_=("post-content", "post-title", "post-header")
27 | )
28 | ),
29 | )
30 | docs = loader.load()
31 | return docs
32 |
33 |
34 | class HypotheticalQuestions(BaseModel):
35 | """Generate hypothetical questions."""
36 |
37 | questions: list[str] = Field(..., description="List of questions")
38 |
39 |
40 | @chain
41 | def generate_hypothetical_questions(doc, hypothetical_questions_count=3):
42 | hypothetical_questions_prompt = hypothetical_questions_prompt_template.format(
43 | hypothetical_questions_count=hypothetical_questions_count, doc=doc.page_content
44 | )
45 | structured_llm = llm.with_structured_output(HypotheticalQuestions)
46 | response = structured_llm.invoke(
47 | [HumanMessage(content=hypothetical_questions_prompt)]
48 | )
49 | return response.questions
50 |
51 |
52 | def prepare_retriever(docs, embeddings):
53 | vectorstore = InMemoryVectorStore(embeddings)
54 | store = InMemoryByteStore()
55 | id_key = "doc_id"
56 |
57 | retriever = MultiVectorRetriever(
58 | vectorstore=vectorstore,
59 | byte_store=store,
60 | id_key=id_key,
61 | )
62 |
63 | hypothetical_questions = generate_hypothetical_questions.batch(
64 | docs, {"max_concurrency": len(docs)}
65 | )
66 | doc_ids = [str(uuid.uuid4()) for _ in docs]
67 |
68 | question_docs = []
69 |
70 | for i, questions in enumerate(hypothetical_questions):
71 | question_docs.extend(
72 | [
73 | Document(page_content=question, metadata={id_key: doc_ids[i]})
74 | for question in questions
75 | ]
76 | )
77 |
78 | retriever.vectorstore.add_documents(question_docs)
79 | retriever.docstore.mset(list(zip(doc_ids, docs)))
80 |
81 | return retriever
82 |
83 |
84 | docs = load_documents()
85 | retriever = prepare_retriever(docs, embeddings)
86 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/multi_vector/summary.py:
--------------------------------------------------------------------------------
1 | import uuid
2 |
3 | import bs4
4 | from langchain.retrievers.multi_vector import MultiVectorRetriever
5 | from langchain_community.document_loaders import WebBaseLoader
6 | from langchain_core.documents import Document
7 | from langchain_core.messages import HumanMessage
8 | from langchain_core.runnables import chain
9 | from langchain_core.stores import InMemoryByteStore
10 | from langchain_core.vectorstores import InMemoryVectorStore
11 | from llm_rag import embeddings, llm
12 |
13 | summarization_prompt_template = "Summarize the following document:\n\n{doc}"
14 |
15 |
16 | def load_documents():
17 | articles = [
18 | "https://lilianweng.github.io/posts/2023-06-23-agent/",
19 | "https://lilianweng.github.io/posts/2024-02-05-human-data-quality/",
20 | ]
21 | loader = WebBaseLoader(
22 | web_paths=articles,
23 | bs_kwargs=dict(
24 | parse_only=bs4.SoupStrainer(
25 | class_=("post-content", "post-title", "post-header")
26 | )
27 | ),
28 | )
29 | docs = loader.load()
30 | return docs
31 |
32 |
33 | @chain
34 | def summarize_document(doc):
35 | summarization_prompt = summarization_prompt_template.format(doc=doc.page_content)
36 | response = llm.invoke([HumanMessage(content=summarization_prompt)])
37 | return response.content
38 |
39 |
40 | def prepare_retriever(docs, embeddings):
41 | vectorstore = InMemoryVectorStore(embeddings)
42 | store = InMemoryByteStore()
43 | id_key = "doc_id"
44 |
45 | retriever = MultiVectorRetriever(
46 | vectorstore=vectorstore,
47 | byte_store=store,
48 | id_key=id_key,
49 | )
50 |
51 | summaries = summarize_document.batch(docs, {"max_concurrency": len(docs)})
52 | doc_ids = [str(uuid.uuid4()) for _ in docs]
53 |
54 | summary_docs = [
55 | Document(page_content=summary, metadata={id_key: doc_ids[i]})
56 | for i, summary in enumerate(summaries)
57 | ]
58 |
59 | retriever.vectorstore.add_documents(summary_docs)
60 | retriever.docstore.mset(list(zip(doc_ids, docs)))
61 |
62 | return retriever
63 |
64 |
65 | docs = load_documents()
66 | retriever = prepare_retriever(docs, embeddings)
67 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/raptor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labdmitriy/llm-rag/f2d5e6ea3b1c21c084589667abf6e5c30f0d947f/src/llm_rag/indexing/raptor/__init__.py
--------------------------------------------------------------------------------
/src/llm_rag/indexing/raptor/raptor.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup as Soup
2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
3 | from langchain_community.document_loaders import RecursiveUrlLoader
4 | from langchain_core.documents import Document
5 | from langchain_core.vectorstores import InMemoryVectorStore
6 | from llm_rag import embeddings
7 | from llm_rag.indexing.raptor.utils import recursive_embed_cluster_summarize
8 |
9 |
10 | def load_documents():
11 | url = "https://langchain-ai.github.io/langgraph/tutorials/introduction/"
12 | loader = RecursiveUrlLoader(
13 | url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
14 | )
15 | introduction_docs = loader.load()
16 |
17 | url = "https://langchain-ai.github.io/langgraph/concepts/"
18 | loader = RecursiveUrlLoader(
19 | url=url, max_depth=2, extractor=lambda x: Soup(x, "html.parser").text
20 | )
21 | concepts_docs = loader.load()
22 |
23 | docs = introduction_docs + concepts_docs
24 | return docs
25 |
26 |
27 | def prepare_vectorstore(docs):
28 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=0)
29 | splits = text_splitter.split_documents(docs)
30 |
31 | leaf_texts = [doc.page_content for doc in splits]
32 | results = recursive_embed_cluster_summarize(leaf_texts, level=1, n_levels=3)
33 |
34 | all_docs = [
35 | Document(page_content=text, metadata={"level": 0}) for text in leaf_texts
36 | ]
37 |
38 | for level in sorted(results.keys()):
39 | all_docs.extend(
40 | [
41 | Document(page_content=summary, metadata={"level": level})
42 | for summary in results[level][1]["summaries"]
43 | ]
44 | )
45 |
46 | vectorstore = InMemoryVectorStore(embeddings)
47 | vectorstore.add_documents(documents=all_docs)
48 | return vectorstore
49 |
50 |
51 | docs = load_documents()
52 | vectorstore = prepare_vectorstore(docs)
53 | retriever = vectorstore.as_retriever(
54 | search_kwargs={"k": 10},
55 | )
56 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/raptor/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import umap
4 | from langchain_core.messages import HumanMessage
5 | from langchain_core.runnables import chain
6 | from llm_rag import embeddings, llm
7 | from sklearn.mixture import GaussianMixture
8 |
9 | RANDOM_SEED = 42 # Fixed seed for reproducibility
10 |
11 |
12 | def global_cluster_embeddings(
13 | embeddings: np.ndarray,
14 | dim: int,
15 | n_neighbors: int | None = None,
16 | metric: str = "cosine",
17 | ) -> np.ndarray:
18 | """
19 | Perform global dimensionality reduction on the embeddings using UMAP.
20 |
21 | Parameters:
22 | - embeddings: The input embeddings as a numpy array.
23 | - dim: The target dimensionality for the reduced space.
24 | - n_neighbors: Optional; the number of neighbors to consider for each point.
25 | If not provided, it defaults to the square root of the number of embeddings.
26 | - metric: The distance metric to use for UMAP.
27 |
28 | Returns:
29 | - A numpy array of the embeddings reduced to the specified dimensionality.
30 | """
31 | if n_neighbors is None:
32 | n_neighbors = int((len(embeddings) - 1) ** 0.5)
33 | return umap.UMAP(
34 | n_neighbors=n_neighbors, n_components=dim, metric=metric
35 | ).fit_transform(embeddings)
36 |
37 |
38 | def local_cluster_embeddings(
39 | embeddings: np.ndarray, dim: int, num_neighbors: int = 10, metric: str = "cosine"
40 | ) -> np.ndarray:
41 | """
42 | Perform local dimensionality reduction on the embeddings using UMAP, typically after global clustering.
43 |
44 | Parameters:
45 | - embeddings: The input embeddings as a numpy array.
46 | - dim: The target dimensionality for the reduced space.
47 | - num_neighbors: The number of neighbors to consider for each point.
48 | - metric: The distance metric to use for UMAP.
49 |
50 | Returns:
51 | - A numpy array of the embeddings reduced to the specified dimensionality.
52 | """
53 | return umap.UMAP(
54 | n_neighbors=num_neighbors, n_components=dim, metric=metric
55 | ).fit_transform(embeddings)
56 |
57 |
58 | def get_optimal_clusters(
59 | embeddings: np.ndarray, max_clusters: int = 50, random_state: int = RANDOM_SEED
60 | ) -> int:
61 | """
62 | Determine the optimal number of clusters using the Bayesian Information Criterion (BIC) with a Gaussian Mixture Model.
63 |
64 | Parameters:
65 | - embeddings: The input embeddings as a numpy array.
66 | - max_clusters: The maximum number of clusters to consider.
67 | - random_state: Seed for reproducibility.
68 |
69 | Returns:
70 | - An integer representing the optimal number of clusters found.
71 | """
72 | max_clusters = min(max_clusters, len(embeddings))
73 | n_clusters = np.arange(1, max_clusters)
74 | bics = []
75 |
76 | for n in n_clusters:
77 | gm = GaussianMixture(n_components=n, random_state=random_state)
78 | gm.fit(embeddings)
79 | bics.append(gm.bic(embeddings))
80 |
81 | return n_clusters[np.argmin(bics)]
82 |
83 |
84 | def GMM_cluster(
85 | embeddings: np.ndarray, threshold: float, random_state: int = RANDOM_SEED
86 | ):
87 | """
88 | Cluster embeddings using a Gaussian Mixture Model (GMM) based on a probability threshold.
89 |
90 | Parameters:
91 | - embeddings: The input embeddings as a numpy array.
92 | - threshold: The probability threshold for assigning an embedding to a cluster.
93 | - random_state: Seed for reproducibility.
94 |
95 | Returns:
96 | - A tuple containing the cluster labels and the number of clusters determined.
97 | """
98 | n_clusters = get_optimal_clusters(embeddings)
99 | gm = GaussianMixture(n_components=n_clusters, random_state=random_state)
100 | gm.fit(embeddings)
101 | probs = gm.predict_proba(embeddings)
102 | labels = [np.where(prob > threshold)[0] for prob in probs]
103 | return labels, n_clusters
104 |
105 |
106 | def perform_clustering(
107 | embeddings: np.ndarray,
108 | dim: int,
109 | threshold: float,
110 | ) -> list[np.ndarray]:
111 | """
112 | Perform clustering on the embeddings by first reducing their dimensionality globally, then clustering
113 | using a Gaussian Mixture Model, and finally performing local clustering within each global cluster.
114 |
115 | Parameters:
116 | - embeddings: The input embeddings as a numpy array.
117 | - dim: The target dimensionality for UMAP reduction.
118 | - threshold: The probability threshold for assigning an embedding to a cluster in GMM.
119 |
120 | Returns:
121 | - A list of numpy arrays, where each array contains the cluster IDs for each embedding.
122 | """
123 | if len(embeddings) <= dim + 1:
124 | # Avoid clustering when there's insufficient data
125 | return [np.array([0]) for _ in range(len(embeddings))]
126 |
127 | # Global dimensionality reduction
128 | reduced_embeddings_global = global_cluster_embeddings(embeddings, dim)
129 | # Global clustering
130 | global_clusters, n_global_clusters = GMM_cluster(
131 | reduced_embeddings_global, threshold
132 | )
133 |
134 | all_local_clusters = [np.array([]) for _ in range(len(embeddings))]
135 | total_clusters = 0
136 |
137 | # Iterate through each global cluster to perform local clustering
138 | for i in range(n_global_clusters):
139 | # Extract embeddings belonging to the current global cluster
140 | global_cluster_embeddings_ = embeddings[
141 | np.array([i in gc for gc in global_clusters])
142 | ]
143 |
144 | if len(global_cluster_embeddings_) == 0:
145 | continue
146 | if len(global_cluster_embeddings_) <= dim + 1:
147 | # Handle small clusters with direct assignment
148 | local_clusters = [np.array([0]) for _ in global_cluster_embeddings_]
149 | n_local_clusters = 1
150 | else:
151 | # Local dimensionality reduction and clustering
152 | reduced_embeddings_local = local_cluster_embeddings(
153 | global_cluster_embeddings_, dim
154 | )
155 | local_clusters, n_local_clusters = GMM_cluster(
156 | reduced_embeddings_local, threshold
157 | )
158 |
159 | # Assign local cluster IDs, adjusting for total clusters already processed
160 | for j in range(n_local_clusters):
161 | local_cluster_embeddings_ = global_cluster_embeddings_[
162 | np.array([j in lc for lc in local_clusters])
163 | ]
164 | indices = np.where(
165 | (embeddings == local_cluster_embeddings_[:, None]).all(-1)
166 | )[1]
167 | for idx in indices:
168 | all_local_clusters[idx] = np.append(
169 | all_local_clusters[idx], j + total_clusters
170 | )
171 |
172 | total_clusters += n_local_clusters
173 |
174 | return all_local_clusters
175 |
176 |
177 | ### --- Our code below --- ###
178 |
179 |
180 | def embed(texts):
181 | """
182 | Generate embeddings for a list of text documents.
183 |
184 | This function assumes the existence of an `embd` object with a method `embed_documents`
185 | that takes a list of texts and returns their embeddings.
186 |
187 | Parameters:
188 | - texts: List[str], a list of text documents to be embedded.
189 |
190 | Returns:
191 | - numpy.ndarray: An array of embeddings for the given text documents.
192 | """
193 | text_embeddings = embeddings.embed_documents(texts)
194 | text_embeddings_np = np.array(text_embeddings)
195 | return text_embeddings_np
196 |
197 |
198 | def embed_cluster_texts(texts):
199 | """
200 | Embeds a list of texts and clusters them, returning a DataFrame with texts, their embeddings, and cluster labels.
201 |
202 | This function combines embedding generation and clustering into a single step. It assumes the existence
203 | of a previously defined `perform_clustering` function that performs clustering on the embeddings.
204 |
205 | Parameters:
206 | - texts: List[str], a list of text documents to be processed.
207 |
208 | Returns:
209 | - pandas.DataFrame: A DataFrame containing the original texts, their embeddings, and the assigned cluster labels.
210 | """
211 | text_embeddings_np = embed(texts) # Generate embeddings
212 | cluster_labels = perform_clustering(
213 | text_embeddings_np, 10, 0.1
214 | ) # Perform clustering on the embeddings
215 | df = pd.DataFrame() # Initialize a DataFrame to store the results
216 | df["text"] = texts # Store original texts
217 | df["embd"] = list(text_embeddings_np) # Store embeddings as a list in the DataFrame
218 | df["cluster"] = cluster_labels # Store cluster labels
219 | return df
220 |
221 |
222 | def format_texts(texts: list[str]) -> str:
223 | """
224 | Formats the text documents in a DataFrame into a single string.
225 |
226 | Parameters:
227 | - texts: List of texts to format.
228 |
229 | Returns:
230 | - A single string where all text documents are joined by a specific delimiter.
231 | """
232 | return "--- --- \n --- --- ".join(texts)
233 |
234 |
235 | def embed_cluster_summarize_texts(
236 | texts: list[str], level: int
237 | ) -> tuple[pd.DataFrame, pd.DataFrame]:
238 | """
239 | Embeds, clusters, and summarizes a list of texts. This function first generates embeddings for the texts,
240 | clusters them based on similarity, expands the cluster assignments for easier processing, and then summarizes
241 | the content within each cluster.
242 |
243 | Parameters:
244 | - texts: A list of text documents to be processed.
245 | - level: An integer parameter that could define the depth or detail of processing.
246 |
247 | Returns:
248 | - Tuple containing two DataFrames:
249 | 1. The first DataFrame (`df_clusters`) includes the original texts, their embeddings, and cluster assignments.
250 | 2. The second DataFrame (`df_summary`) contains summaries for each cluster, the specified level of detail,
251 | and the cluster identifiers.
252 | """
253 | # Summarization
254 | prompt_template = """Here is a subset of LangGraph docs.
255 |
256 | LangGraph is a low-level orchestration framework for building controllable agents.
257 |
258 | Give a detailed summary of the documentation provided.
259 |
260 | Documentation:
261 | {context}
262 | """
263 |
264 | @chain
265 | def summarize_cluster(texts):
266 | formatted_txt = format_texts(texts)
267 | prompt = prompt_template.format(context=formatted_txt)
268 | response = llm.invoke([HumanMessage(content=prompt)])
269 | return response.content
270 |
271 | # Embed and cluster the texts, resulting in a DataFrame with 'text', 'embd', and 'cluster' columns
272 | df_clusters = embed_cluster_texts(texts)
273 |
274 | # Prepare to expand the DataFrame for easier manipulation of clusters
275 | expanded_list = []
276 |
277 | # Expand DataFrame entries to document-cluster pairings for straightforward processing
278 | for index, row in df_clusters.iterrows():
279 | for cluster in row["cluster"]:
280 | expanded_list.append(
281 | {"text": row["text"], "embd": row["embd"], "cluster": cluster}
282 | )
283 |
284 | # Create a new DataFrame from the expanded list
285 | expanded_df = pd.DataFrame(expanded_list)
286 |
287 | # Retrieve unique cluster identifiers for processing
288 | all_clusters = expanded_df["cluster"].unique().tolist()
289 |
290 | print(f"--Generated {len(all_clusters)} clusters--")
291 |
292 | summaries = summarize_cluster.batch(
293 | [
294 | expanded_df.loc[expanded_df["cluster"] == cluster_idx, "text"].tolist()
295 | for cluster_idx in all_clusters
296 | ],
297 | config={"max_concurrency": 10},
298 | )
299 |
300 | # Create a DataFrame to store summaries with their corresponding cluster and level
301 | df_summary = pd.DataFrame(
302 | {
303 | "summaries": summaries,
304 | "level": [level] * len(summaries),
305 | "cluster": list(all_clusters),
306 | }
307 | )
308 |
309 | return df_clusters, df_summary
310 |
311 |
312 | def recursive_embed_cluster_summarize(
313 | texts: list[str], level: int = 1, n_levels: int = 3
314 | ) -> dict[int, tuple[pd.DataFrame, pd.DataFrame]]:
315 | """
316 | Recursively embeds, clusters, and summarizes texts up to a specified level or until
317 | the number of unique clusters becomes 1, storing the results at each level.
318 |
319 | Parameters:
320 | - texts: List[str], texts to be processed.
321 | - level: int, current recursion level (starts at 1).
322 | - n_levels: int, maximum depth of recursion.
323 |
324 | Returns:
325 | - Dict[int, Tuple[pd.DataFrame, pd.DataFrame]], a dictionary where keys are the recursion
326 | levels and values are tuples containing the clusters DataFrame and summaries DataFrame at that level.
327 | """
328 | results = {} # Dictionary to store results at each level
329 |
330 | # Perform embedding, clustering, and summarization for the current level
331 | df_clusters, df_summary = embed_cluster_summarize_texts(texts, level)
332 |
333 | # Store the results of the current level
334 | results[level] = (df_clusters, df_summary)
335 |
336 | # Determine if further recursion is possible and meaningful
337 | unique_clusters = df_summary["cluster"].nunique()
338 |
339 | if level < n_levels and unique_clusters > 1:
340 | # Use summaries as the input texts for the next level of recursion
341 | new_texts = df_summary["summaries"].tolist()
342 | next_level_results = recursive_embed_cluster_summarize(
343 | new_texts, level + 1, n_levels
344 | )
345 |
346 | # Merge the results from the next level into the current results dictionary
347 | results.update(next_level_results)
348 |
349 | return results
350 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/reflection.py:
--------------------------------------------------------------------------------
1 | import bs4
2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
3 | from langchain_community.document_loaders import WebBaseLoader
4 | from langchain_core.vectorstores import InMemoryVectorStore
5 |
6 | from llm_rag import embeddings
7 |
8 |
9 | def load_documents():
10 | articles = [
11 | "https://lilianweng.github.io/posts/2023-06-23-agent/",
12 | "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
13 | "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
14 | ]
15 | loader = WebBaseLoader(
16 | web_paths=articles,
17 | bs_kwargs=dict(
18 | parse_only=bs4.SoupStrainer(
19 | class_=("post-content", "post-title", "post-header")
20 | )
21 | ),
22 | )
23 | docs = loader.load()
24 | return docs
25 |
26 |
27 | def prepare_vectorstore(docs, embeddings):
28 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
29 | splits = text_splitter.split_documents(docs)
30 | vectorstore = InMemoryVectorStore(embeddings)
31 | vectorstore.add_documents(documents=splits)
32 | return vectorstore
33 |
34 |
35 | docs = load_documents()
36 | vectorstore = prepare_vectorstore(docs, embeddings)
37 | retriever = vectorstore.as_retriever()
38 |
--------------------------------------------------------------------------------
/src/llm_rag/indexing/self_query.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import re
3 | from datetime import datetime
4 |
5 | import chromadb
6 | from chromadb.config import Settings
7 | from langchain.chains.query_constructor.base import (
8 | StructuredQueryOutputParser,
9 | get_query_constructor_prompt,
10 | )
11 | from langchain.chains.query_constructor.schema import AttributeInfo
12 | from langchain.retrievers.self_query.base import SelfQueryRetriever
13 | from langchain.text_splitter import RecursiveCharacterTextSplitter
14 | from langchain_chroma import Chroma
15 | from langchain_community.document_loaders import YoutubeLoader
16 | from langchain_community.query_constructors.chroma import ChromaTranslator
17 | from langchain_core.messages import HumanMessage
18 | from langchain_core.runnables import chain
19 | from llm_rag import embeddings, llm, project_path
20 | from pytube import Playlist
21 | from pytube.innertube import _default_clients
22 |
23 | VECTORSTORE_PATH = project_path / "data/vectorstore/chroma"
24 |
25 |
26 | async def load_youtube_video_transcript(video_url):
27 | data = await YoutubeLoader.from_youtube_url(video_url, add_video_info=True).aload()
28 | return data[0]
29 |
30 |
31 | async def load_documents():
32 | # https://github.com/pytube/pytube/issues/1894#issue-2180600881
33 | _default_clients["ANDROID"]["context"]["client"]["clientVersion"] = "19.08.35"
34 |
35 | playlist = Playlist(
36 | "https://www.youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x"
37 | )
38 | coros = [
39 | load_youtube_video_transcript(video_url) for video_url in playlist.video_urls
40 | ]
41 | docs = await asyncio.gather(*coros)
42 | return docs
43 |
44 |
45 | def load_data():
46 | return asyncio.run(load_documents())
47 |
48 |
49 | def generate_chunk_content(chunk):
50 | return "\n\n".join(
51 | [
52 | f"Title:\n{chunk.metadata['title']}",
53 | f"Description:\n{chunk.metadata['description']}",
54 | f"Transcript:\n{chunk.page_content}",
55 | ]
56 | )
57 |
58 |
59 | def prepare_vectorstore(docs, embeddings):
60 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=200)
61 | splits = text_splitter.split_documents(docs)
62 |
63 | datetime_format = "%Y-%m-%d %H:%M:%S"
64 | date_format = "%Y%m%d"
65 | processed_splits = []
66 |
67 | for split in splits:
68 | processed_split = split.copy()
69 | processed_split.metadata["publish_date"] = int(
70 | datetime.strptime(
71 | processed_split.metadata["publish_date"], datetime_format
72 | ).strftime(date_format)
73 | )
74 | processed_split.page_content = generate_chunk_content(processed_split)
75 | processed_splits.append(processed_split)
76 |
77 | collection_name = "youtube-rag-from-scratch"
78 | vectorstore_settings = Settings(anonymized_telemetry=False)
79 | client = chromadb.PersistentClient(
80 | path=str(VECTORSTORE_PATH), settings=vectorstore_settings
81 | )
82 | Chroma(collection_name=collection_name, client=client).delete_collection()
83 | vectorstore = Chroma(
84 | collection_name=collection_name, embedding_function=embeddings, client=client
85 | )
86 | vectorstore.add_documents(documents=processed_splits)
87 |
88 | return vectorstore
89 |
90 |
91 | def generate_query_constructor_prompt():
92 | translator = ChromaTranslator()
93 | document_content_description = "Tutorial videos about RAG"
94 | metadata_field_info = [
95 | AttributeInfo(
96 | name="view_count",
97 | description="Video views count",
98 | type="integer",
99 | ),
100 | AttributeInfo(
101 | name="publish_date",
102 | description="Video publish date in format YYYYMMDD",
103 | type="int",
104 | ),
105 | AttributeInfo(
106 | name="length",
107 | description="Video length (seconds)",
108 | type="float",
109 | ),
110 | ]
111 | examples = [
112 | (
113 | "Find videos under 5 minutes",
114 | {
115 | "query": "Videos with length less than 300 seconds",
116 | "filter": 'lt("length", 300.0)',
117 | },
118 | ),
119 | (
120 | "Find videos published in 2024",
121 | {
122 | "query": "Videos with date greater or equal than 2024-01-01 and less than 2025-01-01",
123 | "filter": 'and(gte("publish_date", 20240101), lt("publish_date", 20250101))',
124 | },
125 | ),
126 | (
127 | "Find videos about indexing",
128 | {
129 | "query": "Videos about indexing",
130 | "filter": "NO_FILTER",
131 | },
132 | ),
133 | (
134 | "Find 3 videos about indexing",
135 | {
136 | "query": "3 videos about indexing",
137 | "filter": "NO_FILTER",
138 | "limit": 3,
139 | },
140 | ),
141 | ]
142 | query_constructor_prompt = get_query_constructor_prompt(
143 | document_content_description,
144 | metadata_field_info,
145 | examples=examples,
146 | allowed_comparators=translator.allowed_comparators,
147 | allowed_operators=translator.allowed_operators,
148 | enable_limit=True,
149 | )
150 | return query_constructor_prompt
151 |
152 |
153 | def clean_json_string(message):
154 | pattern = r".*?```json\s*(.*?)\s*```"
155 | cleaned_string = re.sub(
156 | pattern, r"\1", message.content, flags=re.DOTALL | re.IGNORECASE
157 | )
158 | return cleaned_string.strip()
159 |
160 |
161 | @chain
162 | def query_constructor(query):
163 | query_constructor_prompt = generate_query_constructor_prompt()
164 | query_constructor_prompt_messages = query_constructor_prompt.format(query=query)
165 | response = llm.invoke([HumanMessage(content=query_constructor_prompt_messages)])
166 | clean_response = clean_json_string(response)
167 |
168 | output_parser = StructuredQueryOutputParser.from_components(
169 | allowed_comparators=translator.allowed_comparators,
170 | allowed_operators=translator.allowed_operators,
171 | )
172 | parsed_response = output_parser.invoke(clean_response)
173 |
174 | return parsed_response
175 |
176 |
177 | def get_collection_size(vectorstore):
178 | try:
179 | collection_size = len(vectorstore.get()["ids"])
180 | except Exception:
181 | collection_size = 0
182 |
183 | return collection_size
184 |
185 |
186 | docs = load_data()
187 | vectorstore = prepare_vectorstore(docs, embeddings)
188 | translator = ChromaTranslator()
189 | retriever = SelfQueryRetriever(
190 | query_constructor=query_constructor,
191 | vectorstore=vectorstore,
192 | structured_query_translator=translator,
193 | verbose=True,
194 | search_kwargs={"k": get_collection_size(vectorstore)},
195 | )
196 |
--------------------------------------------------------------------------------